diff --git a/generate/generate.py b/generate/generate.py index e55020df4efe532e85fe4a88ca00d191c02a1373..edb7e7201458d07f2b601503661c5aebfb3ef0f1 100644 --- a/generate/generate.py +++ b/generate/generate.py @@ -382,7 +382,6 @@ def generate_operator( operator = operator_generation.operators.HyTeGElementwiseOperator( name, symbolizer, - opts=optimizations, kernel_wrapper_types=kernel_types, type_descriptor=type_descriptor, ) @@ -400,20 +399,20 @@ def generate_operator( blending=blending, # type: ignore[call-arg] # kw-args are not supported by Callable ) - operator.add_integral( + operator.add_volume_integral( name="".join(name.split()), - dim=geometry.dimensions, - geometry=geometry, - integration_domain=operator_generation.operators.MacroIntegrationDomain.VOLUME, + volume_geometry=geometry, quad=quad, blending=blending, form=form, loop_strategy=loop_strategies[spec["loop-strategy"]], + optimizations=optimizations, ) dir_path = os.path.join(args.output, form_str) operator.generate_class_code( dir_path, + class_files=operator_generation.operators.CppClassFiles.HEADER_IMPL_AND_VARIANTS, clang_format_binary=args.clang_format_binary, ) diff --git a/generate/requirements.txt b/generate/requirements.txt index 0afd3ab81459c8545735cf5707725caf5efa4f29..930783ac627ac69ff650396df520bbb1b126ac49 100644 --- a/generate/requirements.txt +++ b/generate/requirements.txt @@ -1,5 +1,5 @@ --extra-index-url https://test.pypi.org/simple/ -hog @ git+ssh://git@i10git.cs.fau.de/hyteg/hog@26f110bc235ad20bff58416a4dba4e1730e74c4e +hog @ git+https://i10git.cs.fau.de/hyteg/hog@516f33ba88809c2174d316883f09221ed0e7ce02 tomli >= 1.1.0 ; python_version < "3.11" clang-format diff --git a/operators.toml b/operators.toml index 01bda864a762f0fb1adfa6c0598778d3815dd0dc..806f89a228fb64cc1de7478b8ff617b4db4443d8 100644 --- a/operators.toml +++ b/operators.toml @@ -367,7 +367,7 @@ dimensions = [2, 3] quadrature = 3 blending = "IdentityMap" loop-strategy = "sawtooth" -optimizations = ["quadloops"] +optimizations = ["moveconstants", "vectorize", "quadloops"] [[grad_rho_by_rho_dot_u]] trial-space = "P2Vector" @@ -377,7 +377,7 @@ dimensions = [2] quadrature = 3 blending = "AnnulusMap" loop-strategy = "sawtooth" -optimizations = ["quadloops"] +optimizations = ["moveconstants", "vectorize", "quadloops"] [[grad_rho_by_rho_dot_u]] trial-space = "P2Vector" @@ -387,4 +387,4 @@ dimensions = [3] quadrature = 3 blending = "IcosahedralShellMap" loop-strategy = "sawtooth" -optimizations = ["quadloops"] +optimizations = ["moveconstants", "vectorize", "quadloops"] diff --git a/operators/curl_curl/CMakeLists.txt b/operators/curl_curl/CMakeLists.txt index 8599561feaa0d3a86177fc89fa6838ccac9a1888..56728228ebc11283efcdf0ec4b2d86753d61eb27 100644 --- a/operators/curl_curl/CMakeLists.txt +++ b/operators/curl_curl/CMakeLists.txt @@ -7,15 +7,15 @@ add_library( opgen-curl_curl if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-curl_curl PRIVATE - avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp - avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp + avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp + avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp + noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp ) set_source_files_properties( - avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp - avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp + avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -26,9 +26,9 @@ else() target_sources(opgen-curl_curl PRIVATE - noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp - noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp + noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp + noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp + noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp ) endif() diff --git a/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp b/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp index 84d74b5353ef2e33fadee89d22327bf8343aa0e5..92232dfbae3e9f3601392844202d80abd0d520d3 100644 --- a/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp +++ b/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp @@ -118,7 +118,7 @@ void N1E1ElementwiseCurlCurl::apply( const n1e1::N1E1VectorFunction< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_N1E1ElementwiseCurlCurl_macro_3D( _data_dst, _data_src, @@ -136,6 +136,7 @@ void N1E1ElementwiseCurlCurl::apply( const n1e1::N1E1VectorFunction< real_t >& s macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -200,7 +201,7 @@ void N1E1ElementwiseCurlCurl::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_N1E1ElementwiseCurlCurl_macro_3D( _data_dst, _data_src, @@ -221,6 +222,7 @@ void N1E1ElementwiseCurlCurl::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -278,7 +280,7 @@ void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -295,6 +297,7 @@ void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp b/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp index b66be43cd8ef200ec89cc5b3cf7bf48dbe286acc..5560334fdac13473c8a8786a05c04b6913447ab4 100644 --- a/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp +++ b/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/n1e1functionspace/N1E1MacroCell.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -68,74 +70,88 @@ class N1E1ElementwiseCurlCurl : public Operator< n1e1::N1E1VectorFunction< real_ protected: private: - /// Kernel type: apply + /// Integral: N1E1ElementwiseCurlCurl + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 180 202 37 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_N1E1ElementwiseCurlCurl_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: N1E1ElementwiseCurlCurl + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 144 253 37 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - const Cell& cell, - const uint_t level, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_N1E1ElementwiseCurlCurl_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + const Cell& cell, + const uint_t level, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: N1E1ElementwiseCurlCurl + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 120 115 37 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< n1e1::N1E1VectorFunction< real_t > > invDiag_; }; diff --git a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp rename to operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp index 25560670b4911b58d4e788a4f79a42f90a037f98..16022bdaafdcf9bf4222ff1342b4434091c19e90 100644 --- a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp +++ b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::apply_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp index 4397e23702b412df4efde066ee8b6ad2875d8888..9e45450fbe7e3be181ed484e19ba0c27bcee1784 100644 --- a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp index b2ddf686ff097890b1e8d827a5a4f70532c574df..7a290c00af72131c1aad7db133c52268f73db17f 100644 --- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp +++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::apply_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp index f8a8c7e263a1fdb02e68e4e76598a98eef6a39a2..7279d4a689cd12e84e192d5acbaa4798ab9465fa 100644 --- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp index a46f961149e4456e7026e7be41114ea8e68d8e02..cf06b5a582770b9eca5c6be5766cc6e4ab74439a 100644 --- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp +++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, const Cell& cell, const uint_t level, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::toMatrix_N1E1ElementwiseCurlCurl_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, const Cell& cell, const uint_t level, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/CMakeLists.txt b/operators/diffusion/CMakeLists.txt index 89d33aa9a3b8ac707365596968b9e8c635358134..694448a638bfd672eff3e4ae3fe0ed369fa4ae47 100644 --- a/operators/diffusion/CMakeLists.txt +++ b/operators/diffusion/CMakeLists.txt @@ -13,40 +13,40 @@ add_library( opgen-diffusion if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-diffusion PRIVATE - avx/P1ElementwiseDiffusion_apply_macro_2D.cpp - avx/P1ElementwiseDiffusion_apply_macro_3D.cpp - avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDiffusion_apply_macro_2D.cpp - avx/P2ElementwiseDiffusion_apply_macro_3D.cpp - avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp - noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp + avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp + avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp + avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp + avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp + avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp + avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp + avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp + avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp + noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp + noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp + noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp + noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp ) set_source_files_properties( - avx/P1ElementwiseDiffusion_apply_macro_2D.cpp - avx/P1ElementwiseDiffusion_apply_macro_3D.cpp - avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDiffusion_apply_macro_2D.cpp - avx/P2ElementwiseDiffusion_apply_macro_3D.cpp - avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp + avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp + avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp + avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp + avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp + avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp + avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp + avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -57,24 +57,24 @@ else() target_sources(opgen-diffusion PRIVATE - noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp - noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp - noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp - noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp - noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp - noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp + noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp + noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp + noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp + noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp + noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp + noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp + noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp + noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp + noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp + noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp + noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp + noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp ) endif() diff --git a/operators/diffusion/P1ElementwiseDiffusion.cpp b/operators/diffusion/P1ElementwiseDiffusion.cpp index f61cda1095c0563f9faabba1cc16200b980d8e30..48131e467fd1bd164b58b1be6e1bcea85a32e6e7 100644 --- a/operators/diffusion/P1ElementwiseDiffusion.cpp +++ b/operators/diffusion/P1ElementwiseDiffusion.cpp @@ -126,7 +126,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseDiffusion_macro_3D( _data_dst, _data_src, @@ -144,6 +144,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -191,7 +192,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseDiffusion_macro_2D( _data_dst, _data_src, @@ -203,6 +204,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -263,7 +265,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseDiffusion_macro_3D( _data_dst, _data_src, @@ -282,6 +284,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -310,7 +313,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseDiffusion_macro_2D( _data_dst, _data_src, @@ -323,6 +326,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -371,7 +375,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -388,6 +392,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -426,7 +431,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -437,6 +442,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/diffusion/P1ElementwiseDiffusion.hpp b/operators/diffusion/P1ElementwiseDiffusion.hpp index 3829a0034996e9a7fc21bb8f6b7563c895d7ee02..af6e18e07469122b7231706a0efb912f17a7ae1d 100644 --- a/operators/diffusion/P1ElementwiseDiffusion.hpp +++ b/operators/diffusion/P1ElementwiseDiffusion.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,120 +84,149 @@ class P1ElementwiseDiffusion : public Operator< P1Function< real_t >, P1Function protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Centroid rule | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 49 49 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 135 123 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Centroid rule | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 40 43 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ElementwiseDiffusion_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 119 113 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseDiffusion_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: CUBES /// - quadrature rule: Centroid rule | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 40 34 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: CUBES /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 111 89 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P1Function< real_t > > invDiag_; }; diff --git a/operators/diffusion/P2ElementwiseDiffusion.cpp b/operators/diffusion/P2ElementwiseDiffusion.cpp index 672809709371a68d1f044f996cf9317e28606c23..1a42c56397c7cdd88f0ee8e2ffc08bed1ca55fea 100644 --- a/operators/diffusion/P2ElementwiseDiffusion.cpp +++ b/operators/diffusion/P2ElementwiseDiffusion.cpp @@ -129,7 +129,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseDiffusion_macro_3D( _data_dstEdge, _data_dstVertex, @@ -149,6 +149,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -216,7 +217,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseDiffusion_macro_2D( _data_dstEdge, _data_dstVertex, @@ -230,6 +231,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -296,7 +298,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseDiffusion_macro_3D( _data_dstEdge, _data_dstVertex, @@ -317,6 +319,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -347,7 +350,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseDiffusion_macro_2D( _data_dstEdge, _data_dstVertex, @@ -362,6 +365,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -412,7 +416,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -430,6 +434,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -472,7 +477,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -484,6 +489,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/diffusion/P2ElementwiseDiffusion.hpp b/operators/diffusion/P2ElementwiseDiffusion.hpp index 6eca940a587c85e18d966339bee38e2626242eb1..919440af72f60cb972f33ab923c674b5f14acd5a 100644 --- a/operators/diffusion/P2ElementwiseDiffusion.hpp +++ b/operators/diffusion/P2ElementwiseDiffusion.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,130 +84,159 @@ class P2ElementwiseDiffusion : public Operator< P2Function< real_t >, P2Function protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 215 310 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1086 1461 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 274 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseDiffusion_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 986 1361 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDiffusion_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 127 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 381 497 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp index 5733867e09bfea94888572eb383ee8db8dd1caa1..8dd27ac92e85c002442bcaf967f87b024b6eb8d4 100644 --- a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp +++ b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp @@ -144,7 +144,7 @@ void P2ElementwiseDiffusionAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -166,6 +166,7 @@ void P2ElementwiseDiffusionAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -245,7 +246,7 @@ void P2ElementwiseDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -268,6 +269,7 @@ void P2ElementwiseDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -332,7 +334,7 @@ void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -352,6 +354,7 @@ void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp index 5a5fa0bf412343b15cbc4b81a6d0950ac9acc902..f08fa6df53f1e09c46efcbcee68831cb5298c5b5 100644 --- a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp +++ b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,83 +85,97 @@ class P2ElementwiseDiffusionAnnulusMap : public Operator< P2Function< real_t >, protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 385 607 17 8 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 571 17 8 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 250 391 17 8 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp index 4f8d29bc184c45e499b2623f4e8d135acd295a75..f639eaae04d5ec18a830abecc08262d2985f6e78 100644 --- a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp +++ b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp @@ -147,7 +147,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::apply( const P2Function< real_t this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -181,6 +181,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::apply( const P2Function< real_t thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -272,7 +273,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -307,6 +308,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix( const std::shared_ptr< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -382,7 +384,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorVa this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -414,6 +416,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorVa thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp index e5ef874b863944145021a9e0cdb6ffd0a0ce2855..7062812609c4c31dfbb13f15177dbd0349ea0e02 100644 --- a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp +++ b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -85,119 +87,134 @@ class P2ElementwiseDiffusionIcosahedralShellMap : public Operator< P2Function< r protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDiffusionIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1729 2398 42 5 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseDiffusionIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1629 2298 42 5 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseDiffusionIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1009 1398 42 5 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp rename to operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp index d9b0c4dbcfc808199fa35fb16839c4ac29d2ff78..60b87ab9f430f1e78e0c27d55ceec43ec90adba2 100644 --- a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp +++ b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp rename to operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp index f6023630c89b07c278f9e9a66a086f26fbbc5061..b41a0d92469562e33e4f1ade414a41ccf6fe5cfa 100644 --- a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp +++ b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp index 0f3d04a19a4633e01ab087a4b2b96f4aac1b8c3a..7f00b2b8009b77d48048bc79dda2aa23a117853c 100644 --- a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp index afaf36e1a9f3bdc4d880facbd899b6cf1b20dd42..9d11044c99a559d6dfa787735304c89eadc2d175 100644 --- a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index 3cc1128942514414f37677fd0c20b37190286f7d..ac0e010443b0da9efa164d4cd1e6b8c163795639 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index 2b6fdcb84f41cae6506959f6c069d821a5bccaaa..bdcda6ad22444ff6f5aa3c736003cf4f97ca79a2 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index 371da2f5f7932ddf88780de5380d5ed08272f414..79e9770d77c15ccff9dc21f1a6096ce13994c12f 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index 334bc5307eab81dd4a3141beae85e838389342d1..911470e4287f5a90ff7d7a2e3e50275bc17dac56 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp index 35800f7769fd4b750c32c455b5c93cd8535f4311..631a97d605a8f7dd90573f1aaf70d76f49263040 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp index 5420835afddf49ca08e2264c6c4ac96d41beb459..50b747de580d56f5ddd67bf5ab1dd40ffcf3d688 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp index 96c7ff88c8dff7063309f47cdd038522f1d77961..6438fa3d74827c0c0afad96892d4828b0c4781e3 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp index 114ae31fe2a96f13f8496f183a75dfce790ec5ad..0b778cc7586290b06072e9aeed9789bd525cda2d 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp similarity index 98% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp index ae6d97a5affa4d0118631d73d6254119bfef2580..e6389f85be260e88589e05ca4497e92eae3c1613 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp index ff9d3ee6b1305d6a581a9cbe3a822ae94bf55644..90d1f0f20ce11d12e25e6f4e8c400537986505bb 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp similarity index 98% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp index 842962196c763ad3c70fe6da5664cc130e4b5c6e..8d5dc9bba7525c42cbb6387e7fd161e9d230b3c1 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp index b6973f15ce99d2480d1b851fe21a857d50cf6e68..c4d8f5117f2dc96f2e508ac7288ed077e1c803ec 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp similarity index 98% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp index a8c60c59f7c673cccba2d160fe7c08b0b8ba6d6e..3260de76ef2acbd5dde219b33b06f4338bc66a19 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::toMatrix_P1ElementwiseDiffusion_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp index 5190dd49462c530b7085afd74bf58fcb87ae183e..b4b506a9ac9fe8c9c0218e2f38a8c8da00d2cbb6 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::toMatrix_P1ElementwiseDiffusion_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index 365494c87ffef74a460fa1c4b4f60390d7bec889..7896b44696c4be3c82289e06f7ebf7721e390b73 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index e1d257fcb9749d0594b1f7a46857f44503b80880..59c2a8835a37fb3169e46964349f600149e49eba 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index 82d4f6a6447095023ee384158d777c74e543301b..982ff306b4b826aa6f807479e7c105e3c38fecf2 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index 4645c9f6fe3d30dd68d715abff6e685eb7b0dce4..6860a492f9f805e9052802ff0cbc055b10ad64db 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index abd0fc64fbd1ff53911fa73c80f711efd7be399f..d532a9352aa9de0e38c24434acc13b5e6c751ee6 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index 2c546d5df490ade680d86537fbbbaf3d79d6b77e..89f5f78aec1568f49ffa4cb92b111131363d8d81 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp index 27134c90ac5eef2aaa1a801075ece366f5f80012..b2ae9b4c6e20950471dc238046d8d9bde0fcc4cf 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp index 2d62014f22c3cd518a5e693e397ec9443874d186..ea6ed321898975458429be8113932d4087f286be 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp similarity index 98% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp index 5ee134f2f13d2097615f18e69c41bd5725c6267e..00858d29c729a1bea1811571dc51e5a5cf1072d0 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp index 9b0bd97e90f124d04db750424a7d7c365e0f164d..195e8de9d854e16d1852c0c19d8b09fc726e2526 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp index 23e440f51e7fab4e3e7689a5bb08b0d74b7ba225..81ca2f5b118b1c728cf2fa4dba45bb35abe3da82 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::toMatrix_P2ElementwiseDiffusion_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp index 0a27a28b26d7d7ee22f9d7ad7e84ebfba3837f14..a687924afd9b46cd0f64ddfb69228bf741b6f0f3 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::toMatrix_P2ElementwiseDiffusion_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/div_k_grad/CMakeLists.txt b/operators/div_k_grad/CMakeLists.txt index bd44475b3b7fb360d644ad7603974ff035886a0c..a917f32691d5831a313911a662c08e060c370573 100644 --- a/operators/div_k_grad/CMakeLists.txt +++ b/operators/div_k_grad/CMakeLists.txt @@ -13,40 +13,40 @@ add_library( opgen-div_k_grad if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-div_k_grad PRIVATE - avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp - avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp - avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp - avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp - avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp - noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp + avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp + avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp + avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp + avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp + avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp + avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp + avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp + avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp + noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp + noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp + noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp + noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp ) set_source_files_properties( - avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp - avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp - avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp - avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp - avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp + avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp + avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp + avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp + avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp + avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp + avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp + avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -57,24 +57,24 @@ else() target_sources(opgen-div_k_grad PRIVATE - noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp - noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp - noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp - noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp - noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp - noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp + noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp + noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp + noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp + noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp + noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp + noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp + noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp + noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp + noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp + noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp + noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp + noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp ) endif() diff --git a/operators/div_k_grad/P1ElementwiseDivKGrad.cpp b/operators/div_k_grad/P1ElementwiseDivKGrad.cpp index ee98f134518932c1c93c222b1f3383f7c7d6f411..a5dd1f51a40baddd346d1ee681b5ca7efb354230 100644 --- a/operators/div_k_grad/P1ElementwiseDivKGrad.cpp +++ b/operators/div_k_grad/P1ElementwiseDivKGrad.cpp @@ -133,7 +133,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseDivKGrad_macro_3D( _data_dst, _data_k, @@ -152,6 +152,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -200,7 +201,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseDivKGrad_macro_2D( _data_dst, _data_k, @@ -213,6 +214,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -276,7 +278,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseDivKGrad_macro_3D( _data_dst, _data_k, @@ -296,6 +298,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -325,7 +328,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseDivKGrad_macro_2D( _data_dst, _data_k, @@ -339,6 +342,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -390,7 +394,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( _data_invDiag_, _data_k, @@ -408,6 +412,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -447,7 +452,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( _data_invDiag_, _data_k, @@ -459,6 +464,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/div_k_grad/P1ElementwiseDivKGrad.hpp b/operators/div_k_grad/P1ElementwiseDivKGrad.hpp index f1c5b366cba81067dd1d6d76ad7659989024dc7e..3862da356ed768442ebe71f93cae7de968863a6f 100644 --- a/operators/div_k_grad/P1ElementwiseDivKGrad.hpp +++ b/operators/div_k_grad/P1ElementwiseDivKGrad.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,126 +84,155 @@ class P1ElementwiseDivKGrad : public Operator< P1Function< real_t >, P1Function< protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 95 102 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 277 272 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 86 93 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ElementwiseDivKGrad_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 261 256 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseDivKGrad_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 66 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 193 160 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P1Function< real_t > > invDiag_; P1Function< real_t > k; diff --git a/operators/div_k_grad/P2ElementwiseDivKGrad.cpp b/operators/div_k_grad/P2ElementwiseDivKGrad.cpp index 763096162730be80548205dcd204083a087978a3..9474445bdf4a84e4f8e4fba861bcab73f5c62d7e 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGrad.cpp +++ b/operators/div_k_grad/P2ElementwiseDivKGrad.cpp @@ -137,7 +137,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseDivKGrad_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseDivKGrad_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseDivKGrad_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseDivKGrad_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/div_k_grad/P2ElementwiseDivKGrad.hpp b/operators/div_k_grad/P2ElementwiseDivKGrad.hpp index 8029975a21537403557f54f5cfe41579a95d440d..4dc8becf0d524fa0d67dfa2647e65d674f7ab90a 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGrad.hpp +++ b/operators/div_k_grad/P2ElementwiseDivKGrad.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,142 +84,171 @@ class P2ElementwiseDivKGrad : public Operator< P2Function< real_t >, P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 290 378 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1273 1640 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 254 342 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseDivKGrad_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1173 1540 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDivKGrad_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 170 195 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 523 676 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp index 65d43fda5e863e92a38d5b2abcf56c93a2219d17..6804e03df759f2d7859821f3851972266212a20c 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp +++ b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp @@ -149,7 +149,7 @@ void P2ElementwiseDivKGradAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseDivKGradAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseDivKGradAnnulusMap::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseDivKGradAnnulusMap::toMatrix( const std::shared_ptr< SparseMat refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp index fed80068eabbbffadadb17a4ade1d61057a35954..511b005671f458ac0a8f0b886e7452a2de0f8e23 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp +++ b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,89 +85,103 @@ class P2ElementwiseDivKGradAnnulusMap : public Operator< P2Function< real_t >, P protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDivKGradAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 668 1044 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseDivKGradAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 632 1008 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseDivKGradAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 518 828 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp index 07cd64ae065fe49af7da56fc44c8b706bf59d4d4..aae2d630ef988cf811e07ed27185660d737ef0a4 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp +++ b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp @@ -154,7 +154,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::apply( const P2Function< real_t > this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::apply( const P2Function< real_t > thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix( const std::shared_ptr< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorVal this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorVal thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp index ae3ca25c0e8f87664fce19281dd8b28c4fd1a251..81e949989cff3557db88699b865a1e05f43a0950 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp +++ b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,125 +85,140 @@ class P2ElementwiseDivKGradIcosahedralShellMap : public Operator< P2Function< re protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDivKGradIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2453 3892 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseDivKGradIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2353 3792 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseDivKGradIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1688 2892 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp index e9ee583698d9c2681e993152183cb55a896fae88..742b6a5df3d51a3e42eec7ac77c99562aa0e1662 100644 --- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp +++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp index 62884c29fb9c0a40781d49b38af73d68b5cddf82..075160629cb12ac82769368c9c321b63ecd74971 100644 --- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp +++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp index 6a72549fb313cfbbb283c4575c62d18344dfc7da..7e48f50533ee670163852877eaee0c74f12c8806 100644 --- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp index 32844194ba143530baec435e19e2098ae9159b55..a5a8fe889d2fdfc896f73b702e819bbf1ea6de19 100644 --- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index 9f880a45273df164510c47fb054789ac52f63dd5..9b41394a1e3b4a9f13ff8df9df2fd29e1c561867 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index 7fb1e61d82c8e4baeb39d354610ea80d164180f1..c3180dc58dd7db747079f4a3e5f33c6d305623e3 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 0a49d7ba6c1df6c06bd1be49228ca9e4e7138e7e..621b9144cda883b680f684f157dcb114f1700e72 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 27a84e720e3a63890926aa5c4ca763150c0bd164..9e074b2a5e5c10b22b843b0e1d81f8e43c01d66d 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp index 3d15cd0889d63df64ad6cf21f905a7d053a7c4fe..ae8283c9ad1a7de3613d6c48aeba9e2280db6e91 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp index e0b46bc26b37e9a4150060bab70fa12ecc884482..c3b7e54a6a6b256c52f90fd3f115eca0ad9b2f0e 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp index 0b753df574f6bc2d79bab72c33ab0db1c600cf79..241e57c52b283546ec3245f2399183536745f2ff 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp index 3b9c298973c219c140c10f32b5c5982936668705..55e3fa2ab4ea2527f4aa22abc590f570b38b93a9 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 97% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp index a20e98e0da20439f408c4bcc480776d880b939d3..08fdf639b810c45507c6cd5bafbfaf2da79e5b08 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp index 248a21e370201415c3f41291c18573aec42c39d7..ebf6a755a93c829069c50b43c79abb449034cb63 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 97% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp index 37f59b2ad095701027d57ab8db0d24b3255a6e8a..b1aea62373de30529af6d5ecf88dd440da80c18b 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp index 262ee517827adbac14a601b849a714e57bcfee6e..76b3867c5c75d0f9cfd728e8317edd4da5c95b40 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 97% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp index 93e49e99a3972d2dd31b07f665cc29c9efb37263..0258b90a65c5607608c9ef5d49e31a4b89af882c 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::toMatrix_P1ElementwiseDivKGrad_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp index 32fca0a6866a5f41a047a8f6837ead2e077d1436..b09d83541d50f3f6571cf736c344c1d93f67d8e1 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::toMatrix_P1ElementwiseDivKGrad_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index 2474e6cc5b113931636179a1ff705fe379bdebb4..426222588a723bc4ae9c16fcb6948ebfb22c6153 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index b4fc63ccd96c18c3bcd29d42473a361e0747101d..756d25bd9f55bfc6c60cb35da4e38e1888da74fb 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index eb9f7a4108ab4147e341e6901d1c7b5051a46948..68c6f0150e69b284a18a2e582f2826f38031134b 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 3e6876d2841b53055dc85b8e2f080cd2ef72ec44..7ef705bededd4381a8b4c1dd84acfae6dbc2deda 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 2ac58fd866a7f66533994ddb8e7d0886be15c1d6..1e24196d93e58dcddcd38bce92e6f92df939a4ab 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 8dc72e26f66594a9af801a5be1bb8ac7ca1554d4..04fced8a4cddfb84384955811ad47227affff6ff 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp index 135d9eff84289c73ab7652d45043963042de8cae..17c7e3afb6de8952afe51a019e306a17165b19cf 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp index b04e3118f0481982e94326a777e11c890490c947..3b8272b2302b43da22b5ba40c44f67b18fafba3f 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp index dace6244acb2f6d3767e7d816188d53c99d83882..f2767976a80dcc1725b4c8fcf81e439ac599a2de 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp index d4ee433c7deefb9db25651397a44008748051474..f2ccc2b2039e3da374517f02efecb8600b872d00 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp index d19cb169678e498258f0fc24a815cb73b9e69efb..16cf14e5d1dac841da27e4f49f187f222a2a9389 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::toMatrix_P2ElementwiseDivKGrad_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp index 280d81063a44ff7611631c245d30c75b4c6382c5..a9ef3882e1ee3778c0156dca6e2ffffb8b915bfc 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::toMatrix_P2ElementwiseDivKGrad_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/divergence/CMakeLists.txt b/operators/divergence/CMakeLists.txt index a3729803e8768c36970de4d38330fbb416bda1d9..dec538e7ba68bfd22bc9e1d40ffb975115f051e7 100644 --- a/operators/divergence/CMakeLists.txt +++ b/operators/divergence/CMakeLists.txt @@ -21,40 +21,40 @@ add_library( opgen-divergence if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-divergence PRIVATE - avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp ) set_source_files_properties( - avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -65,26 +65,26 @@ else() target_sources(opgen-divergence PRIVATE - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp ) endif() diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp index a3480614dffb4647e3b4795729cd255aec45551e..4279d509aab974f958e2cd74d13a8f3c8dea769e 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp @@ -133,7 +133,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply( const P2Function< real_t this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( _data_dst, _data_srcEdge, @@ -154,6 +154,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply( const P2Function< real_t refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( _data_dst, _data_srcEdge, @@ -250,6 +251,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix( const std::shared_ptr< refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp index 8480f840ce60aab3885922f51d57fcddbb25d90f..99cb2ed6e07be254883c257e69d08cc7f569a10c 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,57 +79,66 @@ class P2ToP1ElementwiseDivergenceAnnulusMap_0_0 : public Operator< P2Function< r protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 220 318 17 12 3 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 202 300 17 12 3 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp index 682f2f25ff7e7835fcda7b5110553571edee5ffc..f63530f1c2013011f5816d93c713d8df0fb2eab8 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp @@ -133,7 +133,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply( const P2Function< real_t this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( _data_dst, _data_srcEdge, @@ -154,6 +154,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply( const P2Function< real_t refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( _data_dst, _data_srcEdge, @@ -250,6 +251,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix( const std::shared_ptr< refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp index da1d51b4de35bc3c88a38652c05165e4f14bf336..d9659d2090d8d7a42183282153a5ffdfb14f3ffa 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,57 +79,66 @@ class P2ToP1ElementwiseDivergenceAnnulusMap_0_1 : public Operator< P2Function< r protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 220 318 17 12 3 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 202 300 17 12 3 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp index 9b612ab41f00271d54171c2307ff387461bcddd2..6597cfad20c2df8ce25ad6ee62d44c276e2ecd09 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp @@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply( const P2Function this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( _data_dst, _data_srcEdge, @@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply( const P2Function thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix( const std::sh this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( _data_dst, _data_srcEdge, @@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix( const std::sh thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp index 271f6e6e39c34ebd9a6a630f2ed02f704fd4242a..58026a0dc03e59cdd99444fc89907aad09d3a95f 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0 : public Operator< P2Fu protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 755 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 715 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp index 9759b2b7cf21e6de5bd59d378b7897f6607424c9..b7a33b38bbf42884045feeddd1e72a599952e4b1 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp @@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply( const P2Function this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( _data_dst, _data_srcEdge, @@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply( const P2Function thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix( const std::sh this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( _data_dst, _data_srcEdge, @@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix( const std::sh thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp index c4c588ca7a6b431fce313e9c71905412ce80893e..b04b089152b4c9a59660d0848f952bc816c1b85c 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1 : public Operator< P2Fu protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 755 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 715 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp index 7099d22ea5188633c0bbf6d86aaec42ebda091b8..f92a94c78715a0fcd84866dd2b08409ad681897f 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp @@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply( const P2Function this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( _data_dst, _data_srcEdge, @@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply( const P2Function thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix( const std::sh this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( _data_dst, _data_srcEdge, @@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix( const std::sh thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp index abb88eb565b6116031521c132bbb3e119318a956..d0df6fe8cef0700f9f2d26a57b32c0613fce7673 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2 : public Operator< P2Fu protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 755 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 715 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp index 4b0f4130fa782ed00bb97bb89cb44634d7b4e6d2..e03a5a1af20333a4ea050453d39c611b88b128aa 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp @@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( _data_dst, _data_srcEdge, @@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -194,7 +195,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( _data_dst, _data_srcEdge, @@ -207,6 +208,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -268,7 +270,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D( _data_dst, _data_srcEdge, @@ -288,6 +290,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -317,7 +320,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D( _data_dst, _data_srcEdge, @@ -331,6 +334,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp index 653ee1b0a460a215c11002a8f568c16f6b5e2234..8657dc1717787b0bfe05f0c79b21a2ae80be8bbc 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,88 +76,107 @@ class P2ToP1ElementwiseDivergence_0_0 : public Operator< P2Function< real_t >, P protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergence_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 116 132 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 352 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 98 114 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 312 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp index e0bc14a7fc64f247a54048dd67987119e72e3f13..25f0678d6eb2fc992e203dbe11a6f3c69fcf42d8 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp @@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( _data_dst, _data_srcEdge, @@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -194,7 +195,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( _data_dst, _data_srcEdge, @@ -207,6 +208,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -268,7 +270,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D( _data_dst, _data_srcEdge, @@ -288,6 +290,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -317,7 +320,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D( _data_dst, _data_srcEdge, @@ -331,6 +334,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp index 58716343bfad8765363442dedaa9c0459e9276ac..389d7638d76c701e5fa2c4a7dad089c30a715a71 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,88 +76,107 @@ class P2ToP1ElementwiseDivergence_0_1 : public Operator< P2Function< real_t >, P protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergence_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 116 132 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 352 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 98 114 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 312 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp index 920dcae6cf90ca8d9a4b7653e6e9749042f89954..5a88c93024b1636923b49a78edd347fbf1280f98 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp @@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( _data_dst, _data_srcEdge, @@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -212,7 +213,7 @@ void P2ToP1ElementwiseDivergence_0_2::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D( _data_dst, _data_srcEdge, @@ -232,6 +233,7 @@ void P2ToP1ElementwiseDivergence_0_2::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp index ebec476b3470390fa6f35ac450dfd80d6ebae00e..01a9a941a7f6a5ec801bed3c2a7dbbb9b471cc19 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,53 +76,62 @@ class P2ToP1ElementwiseDivergence_0_2 : public Operator< P2Function< real_t >, P protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergence_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 352 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 312 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp index bd473d03d8931cbd11852bea70c44eacc5240216..47c44040ad0af5e7eed7ae83df4f0bca468c7712 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp index 2ca6bcaebf96329b70088100e65e8b07aa7e409c..157bb143d18cb9d9bd1ec3c83c86b2f30f446b70 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp index 96df5b0a4aafadd3393ddbd1cb3fba1bb1c129ac..4324487f39c9c0b27bd61822d0a8fea21fe93d82 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp index ea48849b48b69852eb381ce8bb5f219aba50f883..fff3e8c5410f55e5bf9acf2d7f36b60a20a457e6 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp index 334eca5cc0c99266f556403c98ac85c812c07030..605405527d2cd30a0cf5fc8d18b53b791a4182e6 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp index 9b6ca0f6b307b42fb9f76504037587a324f4a4ae..f0ee48b0d74a6a94c2ff093cc8ca772c38d3c94d 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp index 94e211f509d52dc59ad8eb2cfdd58e79e2b9c646..7b2391e3b19fdd8c5ade642427bd553f5cc6f1d5 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp index bcc2e8f2ce2ee835b693c686800993e75b529a3d..9bed3fde27faec31ee61f48069949af990b5b379 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp index 4dcd581566c1fd7c4a8744f8a7c312749a5cb1a2..219f179129fa105c112a38e786878448ea12b781 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp index 9a37b8db52c202abf7940bd15de15b45fd9352df..abbde6d1e0ff32f5d2ed47b32d29d19dc776554f 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_2::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_2::apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp index f0fc0ab334be4a48931aed6dcf0fefb6726e2137..e42425837a3b5c84fb13e1da4155b898bbc4bf8c 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp index 3985fe5aea56e611b2c621723534a5f610ad8bdd..107518f464c508712c96bb88fb27db860baf4017 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp index 2622ca338d25c9680d185f814e46aca75a2de2d7..ccd65bb8483556163b8e6c53cee934c207939aba 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp index 1304883ddd8683183c92b1ee85fb7ebd1dcd29c1..72c271f79c9f6d592fa1809417801c731236e86b 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp index 82535c450795355a9372180c17b505902682f3d6..c6e331b62a46fea15f7a125394dbdeef3efe46ca 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp index a2d064fb0e4df4dacd7c3a984bbf39a7c6cf3935..20197c4c3020e2f800e6091220d8ce5c2d682f3e 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp index ebb2f49c307c144a230aefda7afe57393c9a8efd..ee1c6e1c80beb4abd65bd52f99585c5cc2ba9f9b 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp index 396cc539b63d5f2e09d703c36ceffa300190c2e9..c54ae33ff3d05194d3e8a925d12c810a43145a40 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp index 2442c50d3dd48c0b74b44865b0c4bcc552a6a67a..0a08c166868c3f45a57d2268778ea47ec7d0af19 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp index 3fba24ce52eb7c6c9b6d22e71f8f0955e06bea47..f140d92f3ef45b717681c5dac860250cc131776c 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp index 0be5acbdf9a705225d919d8375c08001c178765a..2f4fbfbde061455ef962c192dabbe8e88c2b5f2e 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp index b64342868ef25ca3c479571abfb140fccf464484..acc9c0d1fcb0ffd11846f90e169ddf2b4336dd76 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp index a1694a9e308ee4897c05adebc6404965d0a62cb5..600e403c6f5db9b87563b5f4d557adce93a1e091 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp index 71006702f16d05c43fa4b7ad759e121c1798a1bb..73af29568199db3227ba8be0f395e1e349b2d2d7 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp index 2057cab6b415f2d7d94f3dfb64542e75b8b39ae1..4f36c2a3c6d6faaf6c23fc936eacea6cc195d469 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp index ad720e9c7e1091884e685c73b21f84f27bfcd64e..d957a1ed2f9e7f0ee0c2de678586332bf6740b27 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp index a390a7626d5eb2c796d352e27b4d3eb4c0080de1..bbd6556c11a29237e3afbadb67d823cc7d4efeb2 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp index 3ca38cd92589a24086d5e7ea024826996d86f182..8d8310e2f3d3bc923140364945124ff782853d05 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp index 390f8c5793d3817ec3a87f33ce44c67d49060264..e11280ecd2e92a303648e2262bf3ef564f046727 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_2::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_2::apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp index 094ca101fc9d065b5a9e97a8014ccb10bce710a3..391888d747c518d7826af06f4d361becb8b3abb5 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_2::toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/epsilon/CMakeLists.txt b/operators/epsilon/CMakeLists.txt index ed34aa1e02b4a3d6b6d00a381711014e8e9b4213..9e6dfb2331504240e852bdbaf79e84e596851140 100644 --- a/operators/epsilon/CMakeLists.txt +++ b/operators/epsilon/CMakeLists.txt @@ -49,108 +49,108 @@ add_library( opgen-epsilon if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-epsilon PRIVATE - avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp ) set_source_files_properties( - avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -161,68 +161,68 @@ else() target_sources(opgen-epsilon PRIVATE - noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp ) endif() diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp index 192163745676002f03ac660f0369542d4e55d395..c957ab96327c97ff0241187e310f9bcccdedaa8e 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp @@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix( const std::shared_ptr< Sparse this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix( const std::shared_ptr< Sparse refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp index 8dc175a87fea2a3e9b3f668504802db253805108..81612f446afd12b776bb123e17d0af139760a188 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,89 +91,103 @@ class P2ElementwiseEpsilonAnnulusMap_0_0 : public Operator< P2Function< real_t > protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 756 1132 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 720 1096 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 546 916 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp index 64e79f28d61bef52cbc9ba7870689031acdd34bd..f95365a7f55479bc60f1c1cc09afa49d71a03df2 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp @@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix( const std::shared_ptr< Sparse this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix( const std::shared_ptr< Sparse refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp index a14a2551188e6221ccb526d359da6e2edfda4a7c..a702fc802d846b0f72be004124b69ef4c329f107 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,63 +85,72 @@ class P2ElementwiseEpsilonAnnulusMap_0_1 : public Operator< P2Function< real_t > protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 936 1192 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 900 1156 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp index 1dc47728ab95a0ed85407d3de25c7fc2cb2b5a9f..edd35d7bf3220f7f00234865978242ad7ff631e6 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp @@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix( const std::shared_ptr< Sparse this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix( const std::shared_ptr< Sparse refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp index eba130ce75aab029b229851b9c019a805cdb917d..4d137d85375867d81ce87dd2bf5d81bd3441d3e8 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,63 +85,72 @@ class P2ElementwiseEpsilonAnnulusMap_1_0 : public Operator< P2Function< real_t > protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 936 1192 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 900 1156 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp index e42063e853b628f308ac7ab84e3fbc01e18dabd3..3a421100cb90eb4f68a8336175538b687c64f674 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp @@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix( const std::shared_ptr< Sparse this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix( const std::shared_ptr< Sparse refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp index 8233c00ee8c953aea41a1e3210233a3c4b8067c8..8c85f61090ca7408a0846cafc5b006c1cec9e527 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,89 +91,103 @@ class P2ElementwiseEpsilonAnnulusMap_1_1 : public Operator< P2Function< real_t > protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 756 1132 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 720 1096 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 546 916 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp index a65fdf3f137fd2a91eafd8f07d2f31f6209526b3..ad2f1db1328c1c4c0a204311c7ca2f472ae8e1ad 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperator this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperator thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp index d7ff98d640afc6796dc908169554d3d20179cf78..28b2a4068b05bfa915b6913f7943f39080a3372e 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_0 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3038 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2938 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2048 3397 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp index 94573669f12a911c4a992b1f0669646aba14a540..bb2a2f7284896085cdf4a8d72964bf337d229f80 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp index 9dae9aad00237e3e813dd3f807c639774e9b683f..f477f5d32c79c658b2c483d89c3bea32fc65ecc8 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_1 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp index bd6716eebdf58a697dcbea9a2bc292c8e5a59533..2bcf6f1f8883b9f03fc96c9fdfed909f95e8a5db 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp index 91701d41688b6c92e884f6d7bdf8bc018a5f409a..269b0394f402d0744050acc76651759129b7dd7f 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_2 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp index f4acec7b82b513e817002c5b2a3a77ed30719ad2..0851bfd7b8a62b5bd5963a75387c1f8457fd01ef 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp index 832bb4d7d6874657a2fae8be56801981300dffaf..26e41a15e7687c9ec04e2c33405dd484b1ae13ad 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_0 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp index 39237dc7da86bd38330140d6177217ea824dcaba..03369a4cef831ce7a953d6c48e97a93aa50be0ed 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperator this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperator thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp index a22e0b9804ef905305d4840013a379b1f7013ef9..9311dbc3683979467f1b0458759cb5c8cbe08068 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_1 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3038 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2938 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2048 3397 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp index 4605790efbcd7e758fed1a1c25198f0a798e57d7..6b147ca6d3bb0b92013ff04b00cd573b0277e531 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp index 5592492cfc7827246b02d90d4d0ffffa8cd0892a..9e94c2498288449e237a163c85938a4d23df6001 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_2 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp index e065d63397a2b925aed851c1aaa57769e2b07be0..27352945a84b21ff4a1e8d910791349c84476390 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp index 8ae0e66c609c40b5e9c463bcb6741e2054e19312..a20203109632c7e8159fb08ac905e31534e28ec2 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_0 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp index f5cc8cd0e0a8a5d751ff8ce9458584ac1bc8473c..92a66ed3852ae36233c4beef4e060dc6af518d83 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp index 324284a67b9dd1f2ba9547749f52e9ff38abc530..db912ccf801be538c9d81e017a0851783f8f8dd1 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_1 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp index 18938aa215acc3dbc3d4dec39cacf482b32c5d3f..8052ed92b40326170d86d25a48bb9bad37fcc1c8 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperator this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperator thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp index 49b5755f67f2242b636d3927f2f92d614a9e02a0..ce9ba70fd8b7c33a562b3e22ab326b1222fcbe4d 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_2 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3038 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2938 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2048 3397 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp index a9ec5f856935651c47768837184a6a50e63cc376..0fdca154f69ec176f796ad69feb6acb9ef7c193d 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp @@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilon_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilon_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp index 0c72a0ab089644cd66fb471e898aa68f307b149f..2c51d0da70f62aac4a3fe03c1f97e16a4bc29f9b 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -88,142 +90,171 @@ class P2ElementwiseEpsilon_0_0 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 146 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 108 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseEpsilon_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilon_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 63 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp index 96e1003e67fc58961f4f629f393087868d0f78ba..e1c16d43d2476fc12947f5aeccaeeba50d48f4e7 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp @@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilon_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilon_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp index a27df5e5a16970d56902e88784e948b33ce6167b..78c96a476c90f7ae98560c7ec988640437a4f8b0 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,100 +84,119 @@ class P2ElementwiseEpsilon_0_1 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 191 189 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 155 153 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseEpsilon_0_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_0_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp index 5a5ece8eccdc5517a90d28e5f65ee66dff61e19d..49e11fc2447c69d98f5d2549848f54851baa3317 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_0_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_0_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_0_2::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_0_2::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp index 6d2a661629de153b545c3ed373ba2d39853b2973..ab6f49ec4e9c910babf56f549fd30d9efb92cd11 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_0_2 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_0_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp index 2381b5eddc7330f72cd04d58509f770a0660d294..666edf6b186a5c3756113039400ec11be306e167 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp @@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilon_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilon_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp index 7ef9941787cce892290815c4a5181ae187493337..a7fdce7d0c3ef5e3033fbcdb152ef5cea887d736 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,100 +84,119 @@ class P2ElementwiseEpsilon_1_0 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 191 189 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 155 153 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseEpsilon_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp index 3abb5d43bc6695cf8f35ab2db456b223fa25ccd2..37551a93d05992bda5bf90571cd931e5fa16e9c4 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp @@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilon_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilon_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp index 13bcd9f672cfcbd2c9e70fb9fe4dc422f9921998..db1ad8ea53df234ea165b24451f6364be345fc23 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -88,142 +90,171 @@ class P2ElementwiseEpsilon_1_1 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 146 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 108 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseEpsilon_1_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilon_1_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 63 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp index a82145ad949916be01ca2876321e9e6fb7feac9e..229162c817bd7a9a52634503453693bbab462e32 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_1_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_1_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_1_2::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_1_2::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp index 061fe5347a06176420d12d80b7adc77c9852077f..cc82cde075edd05abc49d57cf7b3e3bd42b6e157 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_1_2 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_1_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp index 88fcdb37d65588787037c2784e2f72ee29aa10f4..42e1e428ea505ddf9b31b62643005ec11afe4abf 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp index 7313eff988d81036ed5d706185322a1fb388606d..bfb70f486d1a444c0522954d84798717b769fb50 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_2_0 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp index 2e531a7c550cff316402e4ebd3606dbb2bafe24f..4f5d19bc90cfcf0c5e0bce4e1dafd2b1fd422c79 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp index 9ab103ad3bfc2460ad3d5a82db4cc406f7e2aa43..7487ce1a856e1f5720245fcac04b934c5c4d32a3 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_2_1 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_2_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp index 257b62280fba7678b1cf7665b43c1a255c4b57fe..74d1a338bcd96931676c6b1b0b318d0f1fb5a7d1 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_2::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_2::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -321,7 +323,7 @@ void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -341,6 +343,7 @@ void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp index c6677d45b6575362d1e429628ab0d4bec8a0f12a..444c126667784b8a3e1f8eaec226f7a935473a0a 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -88,83 +90,97 @@ class P2ElementwiseEpsilon_2_2 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilon_2_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index ce842b48341190b729a60e90b99e57d6bf910bb9..b47e68a3617f86be04b94f374e0048a51927708c 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index ef8e4674b7e965e53fc9e975014166530124c370..bbe9480edd548a7f7a36becb1c39cd8c31a32990 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp index bd671a16cfb56bf5113f2fd8a3ca176cc26e960a..d9cd38d52e1f250d8d0129622a6f38e52fd00836 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_1::apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp index b84cbeabd8dcc6892af2a9c0978edf3d2a06a713..59e329a785958cafcdd4c253247e8f1de2116c88 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_0::apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index 56b89e6ec639d678225fc257415dd374fa40466b..3baa7447cf85f8d4aabe5fff60e54e25f4a7d337 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index b4709967373753655ed04c7566e671ffb74e9a9c..3a670082c87c56f2a2aa517d3267a13833ea1e96 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 48a9372c612dc03495a6e6b8ca5460adb16099f9..396e50cb96bccc7c7023bff6b7b6978b3fd8fb16 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 43f779eed17a6c224e7d5b26399b981314225e42..491f16c851c4af84406f7baea7193e49a60da523 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp index 9cd33c419affa03e7c9ddc9ec3cf8f901082f171..46604aa5e5f6529b019749857ef60b7ce6964a11 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp index 4c6a2797baf3010c2b6c79869bdcd528013a0abc..d95e76e4c7e37506c590de90a62aa948768af062 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp index 7b29795582b67d129b669b0d31bdbfd239008ba9..d6e58a78f8e7c051b229677b346699d915876f7a 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index 598674e804f0f8d807955533f56e5df958893743..689f4fe709c52be5e184f58bdb98b48355391cac 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index 005e27965ec8755a17dff59f97aeaf83dd084fc7..3f3d7eb375fff16eec59e90b5d2766f8e0c496a1 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp index bafa0fafededaf17892a0e54d66e0a1b955052f9..4ec48e9bbbec3599c6202280c32b48474af18062 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp index 17d106ef3944a0659abbfd3b5ea4044155a4a5cd..878c913ba8bb1aeab17ecd18818b9291c590a6cd 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp index 66e3853b91c44d51c81199487df6e5ddc8960676..a8829f8f5e665d677d67880785cf7235bf5d72d8 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index 9c538b0381b20fc38332c9479e4b795abb4f6379..7d75a6ae2d3c7d4d16c813628a03d9e9d6ff3e5f 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index 012da9509da4fb2996eea9df4d658f11b18a1c79..112c7d971fe62d47afbc2071929f351265608776 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp index 42313668edfa230f0a4a6621e3559ac628caf730..63d3593b3c9fd9cfb995df83a3b6968d969d904e 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp index a8bd09682401a495d324bb8ac2df30ef68f35499..36f61a5e25171c1659daa48f1135f34ec60a31d3 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp index 32acd0de960cb39a8a33ba1d4ccd07083b244822..9b7c8917f03026099d26ba0905e68f42637bc2cf 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp index 2313d32f842173c3a8ea40aad4b249b90bfec7f6..21a76a0d68a480236b602019c7bec1a4120fd3ce 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp index e991c681a91a9ea5c48bf15fc7eed6bdce42cdbe..bc7617f321cff8dc17d0ab9e0fbf7ccfcdb90d1b 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp index 729b987d334e73b20e0a74b5e80f3c7a2fb93a67..8e91b3bcddb6d6e5744aba1f1b89c5ed560d7833 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp index fdc396b287a2ff000baae5642d806dd6108eaf35..71be2bfdd443e2bcf7d660422bc9b8c10e5a8046 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_2::apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp index 228462fe2b0b81b9042b0033df9b2529227170b4..49b83fbe0cb2c2b6170fb4d4140beb377be1ecf0 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp index 64c043ffee60d3207f9970628cb7385c6f56f7e3..3494ba7f429537cd15040798abd5b081057474bf 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp index a89339ae093a101e60d35bee6e98a5de8720d427..6df910959159410fa4cc992a32deabfc78c90692 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp index 76f79e5e52ecf078067f7f0e1198b6c7ca4adf33..ae126d2a2b63450ae9c3d86f94c740dacf0a5695 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp index d9e7eb50368e01c3c96318e20f99c8d408bb853f..b55ea264a275313db91c173914d0c96ad4b7fb2f 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp index e63208613f9ae90369e60af02fdbac2483f4c9da..11c5733e7ff69b4482447ec684a3271da8901b58 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp index 13ddacc43a8b087cdab1be0bf06db682bc751462..86cf0c46119bf3088319a2a9ad90775b9e826ff3 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_2::apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp index 55827d3e64702566b434dc7106f3a9a5fa2ab75b..067d5ebdcae0c60402a754b36d20cb4e2bd72e9e 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_0::apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp index 1da51cbdf511961d2210ec896e41c2e81bdd81ff..872f66dbe30d035d7a44f0856ae9216c8c9c3204 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_1::apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp index cfb3b76fddb96f54d11cd00de92a5ff54302221e..41c47dd7ef5c779244d0cabb33259bfc26748603 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp index af6887f0463b628cd2259c1d5ed97be0d79e2403..bfbc9bdcd4ef98d1895cd215c04647e5e68987d6 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index 635fcc9ac66e5f19b4130084bc339f64a05652f6..8859de5a8546140de11d6293356313e083b15886 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index f624a38172be37c70085f712254ef4dff92938da..7023d510e26d209bbc2955c69a55813231bf5b04 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index 45eb4ac0cd582c0b02dfdbc389dc89146c75ed1c..f88bffdcbed9c5fa14fabb7578ed6950bfd6e09b 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp index b0ba0ca12ccf3266ea365981b8cf0d40fb2e0b55..b6cdec48039cc6c253317495314f3b7e8cd4024c 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_1::apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp index 6368a6512b9cb2e5cb48edd5a3f2feaa9abd456c..ef2f5199590107df1ca310ac77359ce2046df300 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp index 39c9b04364f2d1b370297b2bfd70d1b8dcfec148..d46b11316c58aa4d8a692474390b37a503db4abf 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_0::apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp index f5a609f811fe236025e24e6e4ba200965d494e18..17e0289792dc71fcabdb8b01a3c26b71e44c9a28 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index b6cfddabbfac1b109c1ea3dd3e1ff7116db7e94e..9ebafa72ada76841238a8a2997da9ab9d7bb6eee 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index 50bb3db14a8f7e9ab573a011435dba0b9988f56d..2a2c83dc0a378f71c17d03d23271519a19407bbc 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index ae3d2188941de94ba3dd995d05ba457fc6fe1bc3..4ff5f3ce3ad573d7481ecb806e9d97baccf1df3e 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 8d3f4915931242002c20f664e6907c4740cce8a6..d948db60161faa797ae84cf2b2715c772e5bcb40 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 0429ae906b5a1a9d60616606cb9bf393537f33a2..14a150f5d3cc418c43bc5a529195e3c79125ef64 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 6e00075f57f1a0829ed3032501226b53a65d20c0..24630d2cb59d7bcc62bd9713bd2a0d901892f485 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp index 9c2f8b1311845742a2e1a6434a9e64a2636b54bc..0d6cc1524bf28342c4d31083d62431a543d28502 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp index 9746d2389718625042db7743c43d86f0fac5d795..cf07447dbc75247fd1f4366cdf8926ddf981fd95 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp index 40e85e4935d4741bc49f26edbc70fd20a49d5285..093daae6d7d71fb6644c33c5027b8d4cac7e8fb6 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp index 2d1ae586d14d3851147d5132a6bffa85f7385698..933f47e117ee85e322fbe86aa9551f1642146b09 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp index a3c8f8317622412572f21988a63e4ca63e93a2ef..8ec79a9fd9072ccc1672e8b0d633fa9c3f0c42b3 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp index 6ea180e2fdec870568eb165eadf187ee54c21fdf..35c9f7c5eacd16ee420c956dcde7fe29a6ef7ca6 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index b8fa2f15711d3d0dd01897bed65b0239367a7eca..92ad7a474173d29870e7e27668f7f01dd83774ac 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index d6ed3aaa535f6e41ad5e66e6ce9d761f5ed557a3..a2803b26873837f5677d4cd56b80b73961a3a43a 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index 913751dd1d20e788fff39a0db1540c2c63fa4f29..6b80cdad43b2a0a941dc1b43dd0cd8068180a733 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp index 1d04f1a4f77734960fff1b5e18b2cb454d22a806..f1756dbf5a2a6a0682041219f7ba265d07d9aff6 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp index 4a33acf83b5fc17c1ec940d83737fa0a0c7967ad..d44e9560875eba77c5c2aafe4b595ae541a29964 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp index 19f5036b86e75f7fa5187aec81b31fe585236eac..59e820346769fa8b5fa38c9b83bcb1e346c9d97d 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp index 246bf4b2da79f5d19f2862a78336950b461aa166..4472c135f937213142c571e638710b3749539684 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp index 4839662b38c60fdbb160424cffe5b232247c5b5b..3697fa5a2e46407bc06973046e054f38781bcb23 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp index ac3e7fe12495445b4903aa558a940381c2ed0314..f6eaedfd38e58a798b96dc952dc3d9af7c5bfb20 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index f35664d07720a433bb5ba8caf2304faadb1862d3..81b2f33830a35e583c2fde35bf5378928154e32f 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index 995e8d7d9633207b4426967b9535c9c948fb9582..5c8cadef3f01cf5974e559a52887112ca9532e46 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index 48ccdc57b5b2ba7243a93520f1f7086a3935e9d5..4a401e54e6708c4a397337f89816a4a7f0b17ba4 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp index a8a5ea94a0266fb9cc957ac2f4576a9a787a93b3..e7f157ffed584b3bc3ef9861ab4540f64ca9f700 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp index 6a6c83f8564e359f21ae7d27e89f2dc0a3f6464b..48e29e4aeee1c9cf524bf79c39578c338228cf0e 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp index 29960423a07940ef19cee960f7530f9a5203d995..03bb47dded32d6f22769bcf0be54f62950dac810 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp index 5a6d6a110c15ebc0cf0766e62b7cf47b57c1d4e4..d5c47673e8ae90becd34b01cfcbf37debec95615 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp index 196b06313b742e1a997ffd9dfbe75d19db1fdfff..f5a61bc662f7c5d102b5d6bb7a9052864a8a43fe 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::toMatrix_P2ElementwiseEpsilon_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp index 75e2a24b00d9e18e465a31c27f468225c34221a0..e380f4ef09bfd1bec9f2a88dc37d49cd4446a687 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::toMatrix_P2ElementwiseEpsilon_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp index b43c253201fda45ab8561666e976d137ee822678..5b6f7087be78d8fc035a333d80064e317abb6eb0 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp index 377700e3f9b03baad867f3ba5c5bc71ee4a579e9..674d6dbb5b653f4620e3f72935bf285bafc09aee 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp index f112ea9aeb48ed5cd7b0c26bdf04fc0901542b77..5c9266e129781a3fd3394b6b0f885e4f008c6dc8 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::toMatrix_P2ElementwiseEpsilon_0_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp index 5df17aeb5f9e6a7fcd53fb35101cb399f0787590..790083361d7a0bb54ecc3575700e39027fd55555 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::toMatrix_P2ElementwiseEpsilon_0_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp index 833d608056a8132e6aafb20bac953cbc074f8d01..178addd61f3989dad00fca33bff0ce0c6c87fb10 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_2::apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp index c0a3819d0b332a361bbdd90815c063bdd6d8d2d4..81d08aeca668e726b66b9922da4a06db414b99d7 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_2::toMatrix_P2ElementwiseEpsilon_0_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp index 626c276595940da33d0677d0cbc504c2efee0094..9c1b8a28c702fb7d34567a407d63192b47877c8d 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp index ce752ff6bf1d0446c948a6df0a7ef8b38fffbd0e..60cf4d519dcde16a8dc2b7b36cae5969a29a8399 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp index e6b12c783a51175b2d94a43061ce5efd9f48d75e..85f92d5e4380abebbc3016754d990c5a11c5f086 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::toMatrix_P2ElementwiseEpsilon_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp index ff45b435243856b4381b3ca2511b861d2366596a..ec5f7bea0e89a7d2c53558473d6cca1e56936a37 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::toMatrix_P2ElementwiseEpsilon_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp index eedbea32bd7c5549c3b2eb026a8a5cbeeb36e395..39298ace0bdd387862ac0e01a27e8c26cd4aa377 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp index d449ae6a42e2fdd3f8cc1417dcef8e6b0070ff2b..026cd8dd08a4c3da772bdc542f278ed75cb64f14 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp index 91d9e9ca6f03aabb0ae06c24f384af3974a5e287..cea03d04b1b58c18f33bafb088e775df2ad95092 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp index 883735ab75b5c75a4d9a1223f1973a18d18f1e3e..0a111455a9fad80e920aba1d3aca28a92dc7ded1 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp index 79549f479cc19721853d37966d2ced2124c913f2..a58872ea6e4fd0e282fa1151d448fdb8ae106896 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::toMatrix_P2ElementwiseEpsilon_1_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp index aff549f40fbac07c1beaa35f8e72aa504e8063a3..1b08642e6f899e140c7fe55daaa2942635162946 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::toMatrix_P2ElementwiseEpsilon_1_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp index b3b27120b4166ea9e78d6b7f62f8ff36d80ac5a1..49e96a4512824b2026667933bd9bef26d9541dfa 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_2::apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp index 8772aa7bd64fd23a9b3541017ee264deede45887..09b18c985d16548288122140f1e56312f675b0bd 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_2::toMatrix_P2ElementwiseEpsilon_1_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp index b0dcd4264c066b021c9ebf8bd9c1963168c2ac16..7f3b1f1f45b1c4f144c0ccbcade2575f39a1e5e5 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_0::apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp index 97ade2533af600940a013a224089a92e2309fa78..d66a4e3f1a9d1e1eb14cfb810816dd85731f530d 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_0::toMatrix_P2ElementwiseEpsilon_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp index fd8f4f22ac7efd03ac13faaa88a93268942c4476..b0e72a0264a69b733752d7bb3662570592caa46e 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_1::apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp index f8c35e998268d83b9825f4d973923a7377cb822d..ece958130f8742ea33e928ab19c471460ea70171 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_1::toMatrix_P2ElementwiseEpsilon_2_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp index 443f240571e32d859dc12910e86c21dfe2e57079..4058126ad655e7f213249fde9dfe4a78b2eeb8d8 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp index f243bfa057298ca6a920afc16dbe4ecb96b559ec..74288889076cdb684af66ac8047d6688b8d22463 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp index 30cc56a5db2b872485161f07a0584996e8542a8d..9801afb4ce964c299df3d053384fe46a50d7f01d 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::toMatrix_P2ElementwiseEpsilon_2_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/CMakeLists.txt b/operators/full_stokes/CMakeLists.txt index cd9d5f1b39da4c7f2d472e1274cd61e7203f952a..4e9d365a922b86a10e689fceae667eb204ba0dec 100644 --- a/operators/full_stokes/CMakeLists.txt +++ b/operators/full_stokes/CMakeLists.txt @@ -49,108 +49,108 @@ add_library( opgen-full_stokes if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-full_stokes PRIVATE - avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp ) set_source_files_properties( - avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -161,68 +161,68 @@ else() target_sources(opgen-full_stokes PRIVATE - noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp ) endif() diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp index 72dfac44106fd3624cefa2b84675d78ff55a12f9..58031f2ccee06309f5bb83d7036f5304e91de4fc 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp @@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::apply( const P2Function< real_t >& s refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix( const std::shared_ptr< Spa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp index 579eb8f0df3c16f8f5b496961861e03ef1589245..2117e368e2918e827115dd787ac29a0cc5432d08 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,89 +101,104 @@ class P2ElementwiseFullStokesAnnulusMap_0_0 : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 888 1304 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 852 1268 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 618 968 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp index efbb3d8b97f718b180e085ac682c436386e6d97b..7127fc85e254baf6c74195cf38bb346b6335b3bb 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp @@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::apply( const P2Function< real_t >& s refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix( const std::shared_ptr< Spa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp index 5c457c8d3d9667fba919a53103919376cae733af..6a380c6e5ba783ca98ab7d873e4919610861a790 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,63 +95,72 @@ class P2ElementwiseFullStokesAnnulusMap_0_1 : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1128 1452 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1092 1416 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp index e701b20c4e7168141fb4451d45e917f10956909f..b618a343f23705c5528c0d3ede0cb94889b7ebe9 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp @@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::apply( const P2Function< real_t >& s refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix( const std::shared_ptr< Spa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp index 299e3fe3ca0cbf1dfb42d6676f7f089060b89598..b6a4705808d5dc29abedaacbaf756eb0c28cd2b3 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,63 +95,72 @@ class P2ElementwiseFullStokesAnnulusMap_1_0 : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1128 1456 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1092 1420 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp index 663ef74a2a26961022ca67fd09cf6bc3314bfbf9..99de0a35729a36caccb7e1dcb827c791e75bb710 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp @@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::apply( const P2Function< real_t >& s refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix( const std::shared_ptr< Spa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp index 61485efc8b84f79c0a1b1d400cd89aa93e9352f4..01025b15ea9bc6e4b29950e7431e18adcb54e14e 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,89 +101,104 @@ class P2ElementwiseFullStokesAnnulusMap_1_1 : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 888 1304 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 852 1268 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 618 968 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp index 48536c3102a43d5abac4c0f1af6ae3f0986c3d8e..2ced3fd9e8f1b46621e29a3e46fc24ba9d39ec27 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOpera this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOpera thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp index df0a75c683223cd8f6d19553fd906af5a7e3d4ae..a32d7e1c2f5092fa8c0ec1e0e538a63fc4f6493b 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_0 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3513 5177 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3413 5077 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2298 3592 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp index da66b82107139278feb2088919f50c530bc6964e..3d449445e8deaf3457dc8752e936706852dad658 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp index 7b524949cffd34e6b6d9b7e91d98f52e939defec..07133b03863f33dbe65a8d51eb07c9a462c00c02 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_1 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp index 3f5efa0ba9414765f675b45d72fff4d9dc20c3e0..ff66e6ea3f2d7b4fa0385dd2fef29c63fed70a6d 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp index 3f5c1ae6c0f48c2e133cb5dffc2d3a2aa533e4be..5e2cfe063704f7a5bf3b77812c0d3b1f9972c7a5 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_2 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5392 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5292 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp index 8f1aebffc5be50d9777cd8dc2ee069c15df4e829..2b4f9a3a405c785fe16ce36e4274a1d9ba4dcd95 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp index c0c066286f0cb8ec6d47fa6ba9b06f2d7331d6c0..283daa256f9f8c57a94a960c14c0bbebbff58fd6 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_0 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5387 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5287 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp index 65de5632aeb767fa6904dd419c6cc498501b2797..fb682439b4d33adbd18d90cd2ce1b77eb348707d 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOpera this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOpera thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp index b07be55019869d121926d7c579c68a9e43973181..69319c208b3cb7789675c4712130481854a1edef 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_1 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3513 5187 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3413 5087 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2298 3602 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp index 931979320c7aecb4ead90e27635e7a5802a4aad0..89f9ae24f7464e0788e94ea7bf67e19128b80425 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp index 12b40740df6bc8da78f24b546707185687f9945a..3949dc64ed3df097d65ed6675a554c556f25e3de 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_2 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5392 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5292 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp index 9b9e077ced361c4a7e04bc2d18c0415e047063e9..2b12e9a48d8896c1ba16368a10c59a894eff497e 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp index e7f89bd7f13a51202f43489132eff55763fe1455..66b9c4643c6d0c883dfa7f6ee445a65b85581809 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_0 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5387 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5287 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp index 33789ed1d114c7df64783b875af1c7663ac1de7b..b8c0e5e9e37f3f85f7bfff36e2cc7970291db3f1 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp index d758b38248ad8321228bf1f722cfd6490f107a94..d584d34cae7011b79c001ec43c82aa4cb1eb449a 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_1 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5387 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5287 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp index 1ed1ee4be16207f69a978a37e61d8d47d92f6ea1..f3b67bf070107f470fac0fb22ec244f0b2a7c7f0 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOpera this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOpera thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp index 56e9fc5b6e8104b3fe0ffcd80a0f5ad2f56d2bbd..0288c626c6f5f44c163a8edea302b2f0dfd1f74b 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_2 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3513 5182 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3413 5082 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2298 3597 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp index e1c987820fda51afa63310d6227db530024db0df..928dd7c8a31e3425b48284a4e1e36636f5126007 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp @@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokes_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokes_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp index 63e4e09a74137ae1d56c175eb41fa20cc3d2c236..df1b1b4fe5d0008abb7f530a1486df29b8d716e2 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -98,142 +100,173 @@ class P2ElementwiseFullStokes_0_0 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 146 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 108 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseFullStokes_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokes_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 63 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp index a9cc13814a529214a636f3a3aea06e2bb5f18825..8d55c414c83fea55e1b1c9ebf502f17924eda307 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp @@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokes_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokes_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp index b626c4d7beb2e8b76e7817307fb9f7a99563ada1..2383334deb7d7c67f2ef6280e0f2a600273bed30 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,100 +94,119 @@ class P2ElementwiseFullStokes_0_1 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 191 189 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 155 153 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseFullStokes_0_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_0_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp index ddfb13ea70415f531fb82fac78f48fcc4b83f180..9f8044a734b8c6c055353567b3c3953270f1610e 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_0_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_0_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_0_2::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_0_2::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp index 37c3f570a65a15f5b80c99b2529a068621ed8685..bcd3a4acbb781866e0750ba5d84da9642f4a5627 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_0_2 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_0_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp index 951259bcfb9c8122cd733955e415cd2301b2cb58..044e64af28552905584e9198202b5625e0863d0c 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp @@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokes_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokes_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp index 318f05d4e9ac016d84f26b2dc61c73f7833a00ab..201040680c782043f38da4c2dfcca6127798ab65 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,100 +94,119 @@ class P2ElementwiseFullStokes_1_0 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 191 189 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 155 153 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseFullStokes_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp index 7b031440705357e7697d345dc4dcf03921b9a3fc..a2c8882cba2200f6cd9c6d71748f14eae179a904 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp @@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokes_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokes_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp index 44988bf2257e7073543751bef1bd3fbb694ab7aa..820ab0cc75a935d7c2c3c16710152bd17c7be5cb 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -98,142 +100,173 @@ class P2ElementwiseFullStokes_1_1 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 146 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 108 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseFullStokes_1_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokes_1_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 63 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp index bd7685ec728ceba0f9bf792549a9a7086e9a79c6..b25512084b2d1bc6341b9a49a4770793909a2425 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_1_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_1_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_1_2::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_1_2::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp index 84e25265f2e79c80ad7e616389009a1a57f703e2..e92d814570c1f59a12e74a11b436995f0da42a3b 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_1_2 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_1_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp index b61da148df7b14aab21801e981ad9803cb55198a..7bbbb1004c4fad9ba14ccb747f019bb5b71332f3 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp index 2eaff7a7322e3a2ee865cfc0ac43de6ce57ebf92..f0fac0c02d9a400c3052dc5c043599c37cf21f10 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_2_0 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp index 700635454e684d7af3daa5fbdca5e243b4f511f2..dba8cdb87d36d467fa28e8bb60f6cb22019ffd24 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp index 6903a7f921a31af94708788dae4bdb83a48a111c..5cb09896ccf6ad01970c8f1439c2ec7576400373 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_2_1 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_2_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp index 4d0715c171332a7fe692353df358d18070a972c3..1a7be77d109a912fe302b1e12a31b43f51becb9e 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_2::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_2::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -321,7 +323,7 @@ void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -341,6 +343,7 @@ void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp index 521c219f2afbac08294f029f6e1ad1f9ceb8cb7c..015385dc16ef44a8ec7338de7782dd5dec57657a 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -98,83 +100,98 @@ class P2ElementwiseFullStokes_2_2 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokes_2_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index 39f53c13266970e737adc7d4f3edd935e1bbe026..4dbc0bc1caf9eb3136ea44806c0859f966509549 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index 1460316dbc9614771d466e8fbff26c3ebaa18aa2..bb6db6bc4af56f9a5ca9eb5954e524839f02024c 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp index 3b856520c8367de5cc29cb4c7f5c56b73f6676ea..9018004f3f58369c7df78149f57c4f7f28042a23 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_1::apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp index 2ef8ff10d74956d71edfe405f353f533e83eca87..5681bd3fe5d26a654b171f2c818a745aef010319 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_0::apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 6211a35b3e4acd83cc6e6016c989818b1706c5ff..4efc8a1db043caaf6914c2252a7b38a92e5ee9ad 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 8738303307d6e467b7611ceeba8e75cfb4389245..6240b9a1bd095aae1eef2ba341c36c8f8eba221f 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index d1d11acff7e2ae6fdcb5c38cbbb1af820060312d..bd795eced7995ea6676cb0c8393161fe932cd618 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index a16c53293e05bac7168648c6bf8ebfecd5509276..340f7d6469380829ee755aa2d03b38de7bc91051 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp index 8a7cf2ab9625b88c1dc48e08757c0b3f04e56189..8f0f80144d6c15c5e777ef3661ecf912c5032e6e 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp index 8baecf92b03f093673ec3303d20528b523844743..46887de2fb77af3bf062f68b0409f55660e2f3ce 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp index 3992a8cd4a65cb652bec4e257dbf5d6037c0b87b..8811dc993459bf474f4988da18990f2a2309ba7f 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index f8a51d8156f53df20a92494f618569e3772e6b99..394fcd42b8a8c867a1c848e52d47b6083623e1d5 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index 4f6e4f5435d858a4c7309b61810939a875189769..9527aac23c77b27a4135a4e58b84058a1e8c7c43 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp index 4a7346e9e55066d93da8d12d924ceb373f1ec9a6..3db3326fa3d8491ed04084d20614300793521ff0 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp index 98e38c67e42fa2a44a6bd547228a91aec5985422..b4f9412f0ad576832cd9cf252ca685206baef1bc 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp index 77dfb79cbd3830bc4341ea55389548949d7a2e4c..401e9cafd21c546f602d0a9d09b531fbe9f4c601 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index bca7d6acade16165e80722738df994793024a86a..34bba0e9d0c6916b8e036ca7fa359b758cacf5e1 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index fee58ccdb7943cb27ddbc5ffd13dce78af8bd54c..cc95c034c0365dc842a8ea409c80a5b72a66153f 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp index 6f9d6544bd1287a70601e333bac676f6f67b7de9..100047eb68a46fc19233c0def2f7a75b4b15252d 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 7935e1ece119f4d88495b99595ceb766d8b132b3..37a69d18f83313e123074e8c35984735e11dca52 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp index 7533c51aa6c0573d4e977191c01c12dc21010f3a..7f40a662da1713a047c3217645efc61aef32c19c 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 36278a1b2a5f1cb55e14480a4e00b8ab37d38d44..26ae96beca51d9de61e2ff625663d62dde5e0f03 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp index 477da792c9bc335b9bcfa70066ebb984e0b8bd0f..146c957753f75f6713a87bda9cee8f51c8701964 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp index 83689cd751dfd1846737178678a3b6bc62f5c922..f9f95d88aca0b031a645b6c431a3d809f1a81609 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp index f91882ddc77f71f7725c3bd00b2d0d89941a20cd..c4fda7ce949fbc9000517fee62e94ca5c4bb113c 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_2::apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp index 197bdd294db54fbc3e9a165e8e0856b6f0c42856..b51195bb4f7b8d905f06136e6f8781a00fc41477 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp index 1a4835c0457aaa85e41206a1cc38ee7b7844598e..f4aa4d2b3330da32cb48728738ca988c24759b63 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 921bacac1e12959d6420bcf12797759387b49519..4af8f0317880d5580d8424cc3f202eb9d7be3822 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp index ca6d5eb5f53cdfcb097828ec3a965b874e20e7c5..91b223eb36808f2bf1d58608135fb86749253cb6 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 6e71a66970f88cbcd4d755c0f6dee5e10c15df47..8d9b3df222f13a39ef0c7eea70d542eb202a9193 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp index 7931c82b6a142cf2e0f61339d49b7ffb7314af5f..729b593c92904e7b9f53a71e011d33cc452c801d 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp index a0835f608578dce1f1a44366e6e1467750dffaf8..b6c061684bef4152637d345118e93e98015c252d 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_2::apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp index 2f099c327c7577ddd4d0643c9b33dbfdaf0a0938..de5791d1e46ffc27c4d262a9d7ab3a3d3c968766 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_0::apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp index 2c3c6d45f45f728f11ee34caee894743db3e8f75..3d8e9bfb24f8f7ba9d321b86f75e279bec223218 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_1::apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp index 7d065898651b49e0cf0882fa2c7c8727c282146a..aa9338f2b2f741619cea4423c7b07d76d7a68860 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp index 926febf0063212d0c7b6e0b056938a6aa089ba1e..10a45775c3b3bfecca2f88b6c9a24e4313a123b8 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index fcfdb4541c247978469af636f77deb5ef2be6d79..9b6ea9f3b69f73fe691b7d52d6a1257290c21748 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index 9eb6d67f69cda5d8d78174defbebd5f8fa1a5c58..ec6820a3b4e65a0787161d643ef1b90573470d2e 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index 3f9a5785418d0e9e6f74b7917c73a780d117a1ab..ad06d8872e499dc96ef7439f682317a5b8301c0f 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp index 6682842242d023a98069d777ee3a438d61db3054..e0d4c03577cbec5d3896adaf6cfc7f17ddbf9669 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_1::apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp index 14582315acb51abd88f290e826490d5970b64ced..f942f57dc2dd935c4d8c426780cbb0d5a636b38f 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp index 9b0d17934e47040a1bef1bd2c8430aee5b2fb854..cad742bdf57c8e8a9e53cd732dd10a0b1fb9323b 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_0::apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp index d0bebd24ee6010198a47db60ed107fe2bbb36c82..1e9c19f65946c1d5afd671b8e11606bbaf986ae0 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 979ed833d139a3fb4ebc79042472d91b1d13d6af..6d453b8036a08117a87b72f279d62095bcc057d2 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 98% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 76943090721d2fda108eb5ee12c1ad4b3c088f6f..f2039f443e2b5fb6b304ee28e1ce7fb197704928 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 2a2aa192445b7b1f20b8a4ca0b870f7e6d7a789a..21eb217791f4d7ea34e2da9323b28be6065b7136 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index 81f22134b57aa71477b381493733484bcaae6291..c8a4bbe1d754d21c26b74c2191028dff0e160668 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index f3df1c0955aa9988384ccda4cbd694bc6f3c67df..acccfd6fdaa606c3f7b0215d6b356ba15fdc1177 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index a9d9a1f562e205318b8aaa3efd16862319de855a..d5ad8eb1579a7ed48366a15af153e7e403bc094a 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp index f044c0ae752bc54935ae8fd49f812d6688a034a1..04de9f7267c3b2c6c47a50c94679a5654ead258d 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp index 70b5b051994da973d3ba8171dd4cdff7f8325bf2..3f23db9774de9815b052c82f45623f01e07156a6 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp index 1e0cc9b70b4e8b7afa9f5ce834994749968b6956..b19089c47d790bcf826c0afb63888df96be37f61 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp index b5b1070e1487162f59ec4cc2fde4b3a2e2998fbd..aa32fff8fd2835dde37f6544fab6709e480195a9 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp index 333fd1e87bdc689c0c616a44f9d784a1fbfb6bf2..74b56c629a0b03d3d92186110996d43db5098e46 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp index d72855f6bece3ed2bc318075a32cb9adfd6398e7..a202968d53a76e65b59d5a84b7f1ae6e80349b70 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index a57a3fd2fb9c52acd344355b92962294fb3000b8..772db896ff55ef9bf73ec4013fad418a5a6ed214 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index bb0882fa0af6d060083205f6de3fd77b9ea35202..662bd4a7817a3f9a97beb3009cc6f1d10daead92 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index 434add40e7008aa51ae2ea19c45f137fe190989d..b8eb8580a1a5abb3f658948b6ce4751ff9c1baef 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp index 8b23ea410c4f07dc2b9741261513a2e04dc5a7ee..648491d4bc280e14df5510c2aaf5b01d2c70d7eb 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp index fd776b0a808b2364cddd48f07b4d2d5c0e56fe48..2cef19c38f2db27a3a9b0d11a6d51296151b00eb 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp index 2434f82c04ed0d7dbab8c36b219d3dab61a3b11d..b92b8c5b1ccb1d10db7e4697238fc1619a7c1345 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp index 2e9034d41b88ed61fd20433104c328c72b5839d0..9cbc1b0cb0bc60b8f025a2b4b915ba8e5b07ef2c 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp index 5755a5ae38d278823514654ba99f39a8c0b3295d..3d4c22356cef02b5a25b8b1a328a89fe31d1692f 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp index 2eb5019f44baf58fc4be40aa155e92b54e7ae657..8ef485bcc470f21be75a1026eba6d1bf2d4626c3 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index e84963efae29316b1a1f09f1078b0bb27663accb..b9cd02ced81880c03f47f952c60427b32b78cfc3 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index 520de317678c5dfffc3f670bb68a29541e014dd9..65a69f3efcb7e43c5d8929ea9da7c0de22604412 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index 86ee979c26f007f4de70708af70d33a2df17248a..3946b76207cd3ad1797eab215d251b7510070591 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp index 6b4d7d52d182124481d778364879e53e9fbd6528..61bacabfa2460cfb0c33831a2a6cf06ee76b1a95 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 1aff67912ca4242353f7f485a4583aa1a545ffed..4a0288c3ab7c7890c581f0b1241fb4086d043e79 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp index c9ff205f6bba43eac51f46e7d2db0a5eb7195894..06a99e0d3ed443a50da4b8b7bf25b02c4043cf5d 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 32cc6f0d33e987f9be661d3972300ff8a49de8d7..449bf14834a99845ab8d55a55f1397b68d6c7352 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp index a4527e17267539474f5f77c0b7752c6f41fda16b..95ab9e882485116bf6cb4b1609835521bde15ad5 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::toMatrix_P2ElementwiseFullStokes_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 89a2608ecf048c94e9075947086f9c75177142e1..79fc6e1b4b9c008211240cfd4d711b6f4fab8c07 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::toMatrix_P2ElementwiseFullStokes_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp index 60d01a6e2ce397cfe40e8b52a07debc323f88d69..d7f1666ee96ef22facd96b6dc5929f4536d4d314 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp index 270ad16cc00576bd2247978e984852a25639f7e2..bfdfc243372bcf08907f3674604b9d6dbaeab6c8 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp index 9c47d96349f6337f145f2760d4d18cf08e4f0052..d01b881da6606699e0ede030c4e2587584306825 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::toMatrix_P2ElementwiseFullStokes_0_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp index 03de4a31f856892a25661d9354a4176459eadad1..f787ed05f8cb80d9d5d32b977520672d80da2d93 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::toMatrix_P2ElementwiseFullStokes_0_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp index 119aef26a2bb225a6bcba84dc28d8cfc5694427d..e6eb52a20d1445f285ce9c936dee08c7589ed66a 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_2::apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp index 4db54ae731ad05dc5b8ddf6ad3534622723695c2..9b37c45970ca9b8ee49c3101c0b5e62a6c23b6f4 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_2::toMatrix_P2ElementwiseFullStokes_0_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp index b592dcf87a84ce4928d0f25aca41db5d88cb7acb..7ac7272aca3f0de9df6e16e286ba721ea1f0efab 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp index c41d6827c92f37adeba7112028fbc874d3bee183..292a80791b1a11759e5edf2b0af8927e382be25e 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp index 3a614c4db1f831e076c07f06dc26caa36c4b58a8..3f8e5d1c0f90e4a55512236324ae48c0782b45fd 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::toMatrix_P2ElementwiseFullStokes_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp index 8fe55dc17dbd3e84d92c21f75413eb492a2f98a0..67f69136410767cbdff8f3a9a5b96105caeab8bf 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::toMatrix_P2ElementwiseFullStokes_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 09aaf0e830c8931fef719bc348e0158124432ded..18733b28ded08598b77e31c0b2f80b76acb58249 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp index 17f5451f5d8d5d14c92070fbad5fbf04f05189ed..c53209c5fb0c29ffa83a2bd9f89ddd26b7db04a5 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 8bde675a98dd2300675a3309be077b9f5db68fa1..7ab2a66bcd4f6545e0a25e766765ea2bbc6e0645 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp index d7f4d343e493405e637f1a9dd323539f9f394b3e..5083d9028d72f21aabd387bc54e2a9d0d83cde3f 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 350f2fee12a3c77d9aee739c55c33fb4bca760d8..e91096a42f4dc26abcb7abf7dc15c3928d3ec4d5 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::toMatrix_P2ElementwiseFullStokes_1_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp index 99e17bcef188f7ad6a615e2679642ec8bbb5465b..461c648c79903eacb1c5b2fb3bef6744345a0577 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::toMatrix_P2ElementwiseFullStokes_1_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp index 1a178923cb86e754273824a52fc5bc3f70bf7778..ccf6d3d2a0e8268a7bf1f6035543b25572b1fe8f 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_2::apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp index e8dc95d6fb17c3d5f2ef1227bfc7729c89f8ab11..c7a48f235db77f9b2d5d2425c8031e611e4c566b 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_2::toMatrix_P2ElementwiseFullStokes_1_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp index bc456082086891eeaa9409977563cd0f42fa4998..1f1340c813bfbfef0227fcd26cd2aed8af55cf8b 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_0::apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp index f3ef73295c5d44a87f45a3eddf5781d64b1bb34a..8ae974b7e67ea206fef359746be5015512d9646d 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_0::toMatrix_P2ElementwiseFullStokes_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp index 7246ec9f63ffada6f0f3b1d70243f60e904ac364..bda6c1e4dd0e0b38c111f50a065246bdb1fc61bd 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_1::apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp index 0f363c918657d8d628666a77cd1e6baf63baa4ed..8626314a85abaa3111407199dee6180afb0f748d 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_1::toMatrix_P2ElementwiseFullStokes_2_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp index af6ddf52f67b2e5774a3882b0b5f9eeaae15f419..06c0ca22965da1340d5002de57a5914f65c17a8f 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp index 927aa9d5653821c323759c89198f1957bbde8777..19d3c29467931230eb795c0605dea3c3f9c71301 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp index b132392381fdc6f20205004437c498720cc2c5db..84eeba13285bf97cb5fb843a958959b84a4b669c 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::toMatrix_P2ElementwiseFullStokes_2_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/grad_rho_by_rho_dot_u/CMakeLists.txt b/operators/grad_rho_by_rho_dot_u/CMakeLists.txt index 7660ec9092fcaf95a21c18924db74024e203ef9e..dcb22f8a9d6203a6427e155c272d4d92e1c22fda 100644 --- a/operators/grad_rho_by_rho_dot_u/CMakeLists.txt +++ b/operators/grad_rho_by_rho_dot_u/CMakeLists.txt @@ -8,17 +8,45 @@ add_library( opgen-grad_rho_by_rho_dot_u P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp ) -target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE - - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp -) +if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) + target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE + + avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + ) + + set_source_files_properties( + + avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + + PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} + ) +else() + if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY) + message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.") + endif() + + target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE + + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + ) +endif() if (HYTEG_BUILD_WITH_PETSC) target_link_libraries(opgen-grad_rho_by_rho_dot_u PUBLIC PETSc::PETSc) diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp index 43cad945b2cf3c72d623df73321bc1d24a9dec36..130fed3646aff45cd6006fc4a31568d2defd1db2 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp @@ -147,7 +147,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( _data_dst, _data_rhoEdge, @@ -172,6 +172,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -225,7 +226,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( _data_dst, _data_rhoEdge, @@ -242,6 +243,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -312,7 +314,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( _data_dst, _data_rhoEdge, @@ -338,6 +340,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -372,7 +375,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( _data_dst, _data_rhoEdge, @@ -390,6 +393,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp index 6328e35f5892f03fa190f2546c187cba08ba3fcf..1b85895365f37283e00be0b5cdc8c95dc7bb5eea 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2VectorFunction.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -50,7 +52,7 @@ namespace operatorgeneration { /// /// Weak formulation /// -/// u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2)) +/// u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2)) /// v: test function (space: Lagrange, degree: 1) /// rho: coefficient (space: Lagrange, degree: 2) /// @@ -78,108 +80,127 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotU : public Operator< P2VectorFunctio protected: private: - /// Kernel type: apply + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 356 380 16 0 0 0 0 0 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - real_t* RESTRICT _data_src_edge_0, - real_t* RESTRICT _data_src_edge_1, - real_t* RESTRICT _data_src_vertex_0, - real_t* RESTRICT _data_src_vertex_1, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + /// 356 384 16 0 0 0 0 1 + void apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + real_t* RESTRICT _data_src_edge_0, + real_t* RESTRICT _data_src_edge_1, + real_t* RESTRICT _data_src_vertex_0, + real_t* RESTRICT _data_src_vertex_1, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 1153 1162 41 0 0 0 0 0 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - real_t* RESTRICT _data_src_edge_0, - real_t* RESTRICT _data_src_edge_1, - real_t* RESTRICT _data_src_edge_2, - real_t* RESTRICT _data_src_vertex_0, - real_t* RESTRICT _data_src_vertex_1, - real_t* RESTRICT _data_src_vertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + /// 1153 1167 41 0 0 0 0 1 + void apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + real_t* RESTRICT _data_src_edge_0, + real_t* RESTRICT _data_src_edge_1, + real_t* RESTRICT _data_src_edge_2, + real_t* RESTRICT _data_src_vertex_0, + real_t* RESTRICT _data_src_vertex_1, + real_t* RESTRICT _data_src_vertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 320 344 16 0 0 0 0 3 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - idx_t* RESTRICT _data_src_edge_0, - idx_t* RESTRICT _data_src_edge_1, - idx_t* RESTRICT _data_src_vertex_0, - idx_t* RESTRICT _data_src_vertex_1, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + /// 320 348 16 0 0 0 0 4 + void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + idx_t* RESTRICT _data_src_edge_0, + idx_t* RESTRICT _data_src_edge_1, + idx_t* RESTRICT _data_src_vertex_0, + idx_t* RESTRICT _data_src_vertex_1, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 1033 1042 41 0 0 0 0 3 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - idx_t* RESTRICT _data_src_edge_0, - idx_t* RESTRICT _data_src_edge_1, - idx_t* RESTRICT _data_src_edge_2, - idx_t* RESTRICT _data_src_vertex_0, - idx_t* RESTRICT _data_src_vertex_1, - idx_t* RESTRICT _data_src_vertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + /// 1033 1047 41 0 0 0 0 4 + void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + idx_t* RESTRICT _data_src_edge_0, + idx_t* RESTRICT _data_src_edge_1, + idx_t* RESTRICT _data_src_edge_2, + idx_t* RESTRICT _data_src_vertex_0, + idx_t* RESTRICT _data_src_vertex_1, + idx_t* RESTRICT _data_src_vertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > rho; }; diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp index 2571420005e230aa4c5a9d18f6fc44ca232d8ddb..882714052a7d631ebdfe10955a2f93f7e668a506 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp @@ -141,7 +141,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply( const P2VectorFun this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( _data_dst, _data_rhoEdge, @@ -166,6 +166,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply( const P2VectorFun refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -247,7 +248,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix( const std::sha this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( _data_dst, _data_rhoEdge, @@ -273,6 +274,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix( const std::sha refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp index e94fb2651fd165189c847520e776772f55031a7b..64fadf55db7c1ff96d074293bcef80979d0ef5b7 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -38,6 +39,7 @@ #include "hyteg/p2functionspace/P2VectorFunction.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -51,7 +53,7 @@ namespace operatorgeneration { /// /// Weak formulation /// -/// u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2)) +/// u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2)) /// v: test function (space: Lagrange, degree: 1) /// rho: coefficient (space: Lagrange, degree: 2) /// @@ -79,65 +81,74 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap : public Operator< P2Vec protected: private: - /// Kernel type: apply + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 560 732 24 12 0 0 0 0 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - real_t* RESTRICT _data_src_edge_0, - real_t* RESTRICT _data_src_edge_1, - real_t* RESTRICT _data_src_vertex_0, - real_t* RESTRICT _data_src_vertex_1, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + /// 560 740 24 12 0 0 0 1 + void apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + real_t* RESTRICT _data_src_edge_0, + real_t* RESTRICT _data_src_edge_1, + real_t* RESTRICT _data_src_vertex_0, + real_t* RESTRICT _data_src_vertex_1, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 524 696 24 12 0 0 0 3 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - idx_t* RESTRICT _data_src_edge_0, - idx_t* RESTRICT _data_src_edge_1, - idx_t* RESTRICT _data_src_vertex_0, - idx_t* RESTRICT _data_src_vertex_1, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + /// 524 704 24 12 0 0 0 4 + void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + idx_t* RESTRICT _data_src_edge_0, + idx_t* RESTRICT _data_src_edge_1, + idx_t* RESTRICT _data_src_vertex_0, + idx_t* RESTRICT _data_src_vertex_1, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > rho; }; diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp index 2a68954c8e4dd9640d90c9232b403c649fd3406c..7f7040c357b9ac0dd1d418cfd07493f512282273 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp @@ -163,7 +163,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply( const P2 this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( _data_dst, _data_rhoEdge, @@ -202,6 +202,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply( const P2 thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -294,7 +295,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix( const this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( _data_dst, _data_rhoEdge, @@ -334,6 +335,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix( const thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp index 0166e62c825f7eec05ea27c3ad1974df03c2e458..a209370aaeaef4003a64c7f2314ffb46fd508f5e 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -38,6 +39,7 @@ #include "hyteg/p2functionspace/P2VectorFunction.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -51,7 +53,7 @@ namespace operatorgeneration { /// /// Weak formulation /// -/// u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2)) +/// u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2)) /// v: test function (space: Lagrange, degree: 1) /// rho: coefficient (space: Lagrange, degree: 2) /// @@ -80,93 +82,102 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap protected: private: - /// Kernel type: apply + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 1938 2537 51 10 0 0 0 0 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - real_t* RESTRICT _data_src_edge_0, - real_t* RESTRICT _data_src_edge_1, - real_t* RESTRICT _data_src_edge_2, - real_t* RESTRICT _data_src_vertex_0, - real_t* RESTRICT _data_src_vertex_1, - real_t* RESTRICT _data_src_vertex_2, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + /// 1938 2547 51 10 0 0 0 1 + void apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + real_t* RESTRICT _data_src_edge_0, + real_t* RESTRICT _data_src_edge_1, + real_t* RESTRICT _data_src_edge_2, + real_t* RESTRICT _data_src_vertex_0, + real_t* RESTRICT _data_src_vertex_1, + real_t* RESTRICT _data_src_vertex_2, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 1818 2417 51 10 0 0 0 3 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - idx_t* RESTRICT _data_src_edge_0, - idx_t* RESTRICT _data_src_edge_1, - idx_t* RESTRICT _data_src_edge_2, - idx_t* RESTRICT _data_src_vertex_0, - idx_t* RESTRICT _data_src_vertex_1, - idx_t* RESTRICT _data_src_vertex_2, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + /// 1818 2427 51 10 0 0 0 4 + void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + idx_t* RESTRICT _data_src_edge_0, + idx_t* RESTRICT _data_src_edge_1, + idx_t* RESTRICT _data_src_edge_2, + idx_t* RESTRICT _data_src_vertex_0, + idx_t* RESTRICT _data_src_vertex_1, + idx_t* RESTRICT _data_src_vertex_2, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > rho; }; diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1afdcebe2e0506efcd3e86a266fa6f8f86b03e2c --- /dev/null +++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp @@ -0,0 +1,1071 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42); + const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44)); + const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_44)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45); + const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_37)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_37),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_43),_mm256_mul_pd(rho_dof_1,tmp_qloop_49)),_mm256_mul_pd(rho_dof_2,tmp_qloop_50)),_mm256_mul_pd(rho_dof_3,tmp_qloop_38)),_mm256_mul_pd(rho_dof_4,tmp_qloop_51)),_mm256_mul_pd(rho_dof_5,tmp_qloop_53)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_65 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_41),tmp_qloop_54); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_59); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_61); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_57); + const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_59); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_61); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_56); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_56); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_56); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_56); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_56); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_56); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_58); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_58); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_58); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_58); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_58); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_60); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_60); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_60); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_62); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_60); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_60); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_63); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_63); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_63); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_64); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_63); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_63); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_62); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_62); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_62); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_65); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_62); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_62); + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_64); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_64); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_64); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_65); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_64); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_64); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_9 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_40 = tmp_qloop_39*2.0; + const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_42 = tmp_qloop_41*2.0; + const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_45 = rho_dof_0*(tmp_qloop_37 + tmp_qloop_44 - 3.0); + const real_t tmp_qloop_46 = rho_dof_1*(tmp_qloop_37 - 1.0) + rho_dof_3*tmp_qloop_44 - rho_dof_4*tmp_qloop_44 + rho_dof_5*(-tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_45; + const real_t tmp_qloop_47 = rho_dof_2*(tmp_qloop_44 - 1.0) + rho_dof_3*tmp_qloop_37 + rho_dof_4*(-tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_37 + tmp_qloop_45; + const real_t tmp_qloop_49 = tmp_qloop_40 - _data_q_p_0[q]; + const real_t tmp_qloop_50 = tmp_qloop_42 - _data_q_p_1[q]; + const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; + const real_t tmp_qloop_52 = tmp_qloop_39*4.0; + const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t tmp_qloop_48 = tmp_qloop_46*(jac_affine_inv_0_0_GRAY*jac_blending_inv_0_0 + jac_affine_inv_0_1_GRAY*jac_blending_inv_1_0) + tmp_qloop_47*(jac_affine_inv_1_0_GRAY*jac_blending_inv_0_0 + jac_affine_inv_1_1_GRAY*jac_blending_inv_1_0); + const real_t tmp_qloop_56 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_60 = tmp_qloop_48*tmp_qloop_59; + const real_t tmp_qloop_62 = tmp_qloop_48*tmp_qloop_61; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_57 = tmp_qloop_46*(jac_affine_inv_0_0_GRAY*jac_blending_inv_0_1 + jac_affine_inv_0_1_GRAY*jac_blending_inv_1_1) + tmp_qloop_47*(jac_affine_inv_1_0_GRAY*jac_blending_inv_0_1 + jac_affine_inv_1_1_GRAY*jac_blending_inv_1_1); + const real_t tmp_qloop_58 = tmp_qloop_55*tmp_qloop_57; + const real_t tmp_qloop_63 = tmp_qloop_57*tmp_qloop_59; + const real_t tmp_qloop_64 = tmp_qloop_57*tmp_qloop_61; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_49*tmp_qloop_56; + const real_t q_tmp_0_2 = tmp_qloop_50*tmp_qloop_56; + const real_t q_tmp_0_3 = tmp_qloop_38*tmp_qloop_56; + const real_t q_tmp_0_4 = tmp_qloop_51*tmp_qloop_56; + const real_t q_tmp_0_5 = tmp_qloop_53*tmp_qloop_56; + const real_t q_tmp_0_6 = tmp_qloop_43*tmp_qloop_58; + const real_t q_tmp_0_7 = tmp_qloop_49*tmp_qloop_58; + const real_t q_tmp_0_8 = tmp_qloop_50*tmp_qloop_58; + const real_t q_tmp_0_9 = tmp_qloop_38*tmp_qloop_58; + const real_t q_tmp_0_10 = tmp_qloop_51*tmp_qloop_58; + const real_t q_tmp_0_11 = tmp_qloop_53*tmp_qloop_58; + const real_t q_tmp_1_0 = tmp_qloop_43*tmp_qloop_60; + const real_t q_tmp_1_1 = tmp_qloop_49*tmp_qloop_60; + const real_t q_tmp_1_2 = tmp_qloop_50*tmp_qloop_60; + const real_t q_tmp_1_3 = tmp_qloop_52*tmp_qloop_62; + const real_t q_tmp_1_4 = tmp_qloop_51*tmp_qloop_60; + const real_t q_tmp_1_5 = tmp_qloop_53*tmp_qloop_60; + const real_t q_tmp_1_6 = tmp_qloop_43*tmp_qloop_63; + const real_t q_tmp_1_7 = tmp_qloop_49*tmp_qloop_63; + const real_t q_tmp_1_8 = tmp_qloop_50*tmp_qloop_63; + const real_t q_tmp_1_9 = tmp_qloop_52*tmp_qloop_64; + const real_t q_tmp_1_10 = tmp_qloop_51*tmp_qloop_63; + const real_t q_tmp_1_11 = tmp_qloop_53*tmp_qloop_63; + const real_t q_tmp_2_0 = tmp_qloop_43*tmp_qloop_62; + const real_t q_tmp_2_1 = tmp_qloop_49*tmp_qloop_62; + const real_t q_tmp_2_2 = tmp_qloop_50*tmp_qloop_62; + const real_t q_tmp_2_3 = tmp_qloop_48*tmp_qloop_65; + const real_t q_tmp_2_4 = tmp_qloop_51*tmp_qloop_62; + const real_t q_tmp_2_5 = tmp_qloop_53*tmp_qloop_62; + const real_t q_tmp_2_6 = tmp_qloop_43*tmp_qloop_64; + const real_t q_tmp_2_7 = tmp_qloop_49*tmp_qloop_64; + const real_t q_tmp_2_8 = tmp_qloop_50*tmp_qloop_64; + const real_t q_tmp_2_9 = tmp_qloop_57*tmp_qloop_65; + const real_t q_tmp_2_10 = tmp_qloop_51*tmp_qloop_64; + const real_t q_tmp_2_11 = tmp_qloop_53*tmp_qloop_64; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42); + const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44)); + const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_44)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45); + const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_37)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_37),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_43),_mm256_mul_pd(rho_dof_1,tmp_qloop_49)),_mm256_mul_pd(rho_dof_2,tmp_qloop_50)),_mm256_mul_pd(rho_dof_3,tmp_qloop_38)),_mm256_mul_pd(rho_dof_4,tmp_qloop_51)),_mm256_mul_pd(rho_dof_5,tmp_qloop_53)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_65 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_41),tmp_qloop_54); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_59); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_61); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_57); + const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_59); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_61); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_56); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_56); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_56); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_56); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_56); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_56); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_58); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_58); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_58); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_58); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_58); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_60); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_60); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_60); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_62); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_60); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_60); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_63); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_63); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_63); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_64); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_63); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_63); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_62); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_62); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_62); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_65); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_62); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_62); + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_64); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_64); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_64); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_65); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_64); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_64); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_9 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_40 = tmp_qloop_39*2.0; + const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_42 = tmp_qloop_41*2.0; + const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_45 = rho_dof_0*(tmp_qloop_37 + tmp_qloop_44 - 3.0); + const real_t tmp_qloop_46 = rho_dof_1*(tmp_qloop_37 - 1.0) + rho_dof_3*tmp_qloop_44 - rho_dof_4*tmp_qloop_44 + rho_dof_5*(-tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_45; + const real_t tmp_qloop_47 = rho_dof_2*(tmp_qloop_44 - 1.0) + rho_dof_3*tmp_qloop_37 + rho_dof_4*(-tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_37 + tmp_qloop_45; + const real_t tmp_qloop_49 = tmp_qloop_40 - _data_q_p_0[q]; + const real_t tmp_qloop_50 = tmp_qloop_42 - _data_q_p_1[q]; + const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; + const real_t tmp_qloop_52 = tmp_qloop_39*4.0; + const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t tmp_qloop_48 = tmp_qloop_46*(jac_affine_inv_0_0_BLUE*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE*jac_blending_inv_1_0) + tmp_qloop_47*(jac_affine_inv_1_0_BLUE*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE*jac_blending_inv_1_0); + const real_t tmp_qloop_56 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_60 = tmp_qloop_48*tmp_qloop_59; + const real_t tmp_qloop_62 = tmp_qloop_48*tmp_qloop_61; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_57 = tmp_qloop_46*(jac_affine_inv_0_0_BLUE*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE*jac_blending_inv_1_1) + tmp_qloop_47*(jac_affine_inv_1_0_BLUE*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE*jac_blending_inv_1_1); + const real_t tmp_qloop_58 = tmp_qloop_55*tmp_qloop_57; + const real_t tmp_qloop_63 = tmp_qloop_57*tmp_qloop_59; + const real_t tmp_qloop_64 = tmp_qloop_57*tmp_qloop_61; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_49*tmp_qloop_56; + const real_t q_tmp_0_2 = tmp_qloop_50*tmp_qloop_56; + const real_t q_tmp_0_3 = tmp_qloop_38*tmp_qloop_56; + const real_t q_tmp_0_4 = tmp_qloop_51*tmp_qloop_56; + const real_t q_tmp_0_5 = tmp_qloop_53*tmp_qloop_56; + const real_t q_tmp_0_6 = tmp_qloop_43*tmp_qloop_58; + const real_t q_tmp_0_7 = tmp_qloop_49*tmp_qloop_58; + const real_t q_tmp_0_8 = tmp_qloop_50*tmp_qloop_58; + const real_t q_tmp_0_9 = tmp_qloop_38*tmp_qloop_58; + const real_t q_tmp_0_10 = tmp_qloop_51*tmp_qloop_58; + const real_t q_tmp_0_11 = tmp_qloop_53*tmp_qloop_58; + const real_t q_tmp_1_0 = tmp_qloop_43*tmp_qloop_60; + const real_t q_tmp_1_1 = tmp_qloop_49*tmp_qloop_60; + const real_t q_tmp_1_2 = tmp_qloop_50*tmp_qloop_60; + const real_t q_tmp_1_3 = tmp_qloop_52*tmp_qloop_62; + const real_t q_tmp_1_4 = tmp_qloop_51*tmp_qloop_60; + const real_t q_tmp_1_5 = tmp_qloop_53*tmp_qloop_60; + const real_t q_tmp_1_6 = tmp_qloop_43*tmp_qloop_63; + const real_t q_tmp_1_7 = tmp_qloop_49*tmp_qloop_63; + const real_t q_tmp_1_8 = tmp_qloop_50*tmp_qloop_63; + const real_t q_tmp_1_9 = tmp_qloop_52*tmp_qloop_64; + const real_t q_tmp_1_10 = tmp_qloop_51*tmp_qloop_63; + const real_t q_tmp_1_11 = tmp_qloop_53*tmp_qloop_63; + const real_t q_tmp_2_0 = tmp_qloop_43*tmp_qloop_62; + const real_t q_tmp_2_1 = tmp_qloop_49*tmp_qloop_62; + const real_t q_tmp_2_2 = tmp_qloop_50*tmp_qloop_62; + const real_t q_tmp_2_3 = tmp_qloop_48*tmp_qloop_65; + const real_t q_tmp_2_4 = tmp_qloop_51*tmp_qloop_62; + const real_t q_tmp_2_5 = tmp_qloop_53*tmp_qloop_62; + const real_t q_tmp_2_6 = tmp_qloop_43*tmp_qloop_64; + const real_t q_tmp_2_7 = tmp_qloop_49*tmp_qloop_64; + const real_t q_tmp_2_8 = tmp_qloop_50*tmp_qloop_64; + const real_t q_tmp_2_9 = tmp_qloop_57*tmp_qloop_65; + const real_t q_tmp_2_10 = tmp_qloop_51*tmp_qloop_64; + const real_t q_tmp_2_11 = tmp_qloop_53*tmp_qloop_64; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a5d573edee26692fc847a2a273ceca980c383ac6 --- /dev/null +++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp @@ -0,0 +1,7929 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +{ + { + const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; + + const real_t _data_q_p_0 [] = {0.25, 0.16666666666666666, 0.16666666666666666, 0.5, 0.16666666666666666}; + + const real_t _data_q_p_1 [] = {0.25, 0.16666666666666666, 0.5, 0.16666666666666666, 0.16666666666666666}; + + const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); + const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; + const real_t tmp_qloop_1 = -rayVertex_0; + const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; + const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; + const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; + const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; + const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; + const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; + const real_t tmp_qloop_8 = -rayVertex_1; + const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; + const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; + const real_t tmp_qloop_12 = -rayVertex_2; + const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; + const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; + const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; + const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; + const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); + const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; + const real_t tmp_qloop_21 = radRayVertex - radRefVertex; + const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; + const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; + const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; + const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; + const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; + { + /* CellType.WHITE_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); + { + /* CellType.WHITE_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); + { + /* CellType.BLUE_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); + { + /* CellType.BLUE_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); + { + /* CellType.GREEN_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); + { + /* CellType.GREEN_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..adea928761758829abaf337d1b8c3d87c23941d6 --- /dev/null +++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp @@ -0,0 +1,874 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_7)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7); + const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_6),_mm256_mul_pd(rho_dof_1,tmp_qloop_12)),_mm256_mul_pd(rho_dof_2,tmp_qloop_13)),_mm256_mul_pd(rho_dof_3,tmp_qloop_1)),_mm256_mul_pd(rho_dof_4,tmp_qloop_14)),_mm256_mul_pd(rho_dof_5,tmp_qloop_16))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_18); + const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_22); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_22); + const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_24); + const __m256d tmp_qloop_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_17),tmp_qloop_4); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_6); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_19); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_19); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_19); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_19); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_19); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_6); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_21); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_21); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_21); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_21); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_21); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_6); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_23); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_23); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_25); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_23); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_23); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_6); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_26); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_26); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_27); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_26); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_26); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_6); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_25); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_25); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_28); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_25); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_25); + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_6); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_27); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_27); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_28); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_27); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_27); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_9 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_8 = rho_dof_0*(tmp_qloop_0 + tmp_qloop_7 - 3.0); + const real_t tmp_qloop_9 = rho_dof_1*(tmp_qloop_0 - 1.0) + rho_dof_3*tmp_qloop_7 - rho_dof_4*tmp_qloop_7 + rho_dof_5*(-tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_8; + const real_t tmp_qloop_10 = rho_dof_2*(tmp_qloop_7 - 1.0) + rho_dof_3*tmp_qloop_0 + rho_dof_4*(-tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_0 + tmp_qloop_8; + const real_t tmp_qloop_11 = jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_10; + const real_t tmp_qloop_12 = tmp_qloop_3 - _data_q_p_0[q]; + const real_t tmp_qloop_13 = tmp_qloop_5 - _data_q_p_1[q]; + const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_15 = tmp_qloop_2*4.0; + const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; + const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; + const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10; + const real_t tmp_qloop_21 = tmp_qloop_18*tmp_qloop_20; + const real_t tmp_qloop_22 = tmp_qloop_17*_data_q_p_0[q]; + const real_t tmp_qloop_23 = tmp_qloop_11*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_17*_data_q_p_1[q]; + const real_t tmp_qloop_25 = tmp_qloop_11*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_20*tmp_qloop_22; + const real_t tmp_qloop_27 = tmp_qloop_20*tmp_qloop_24; + const real_t tmp_qloop_28 = tmp_qloop_0*tmp_qloop_17*tmp_qloop_4; + const real_t q_tmp_0_0 = tmp_qloop_19*tmp_qloop_6; + const real_t q_tmp_0_1 = tmp_qloop_12*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_13*tmp_qloop_19; + const real_t q_tmp_0_3 = tmp_qloop_1*tmp_qloop_19; + const real_t q_tmp_0_4 = tmp_qloop_14*tmp_qloop_19; + const real_t q_tmp_0_5 = tmp_qloop_16*tmp_qloop_19; + const real_t q_tmp_0_6 = tmp_qloop_21*tmp_qloop_6; + const real_t q_tmp_0_7 = tmp_qloop_12*tmp_qloop_21; + const real_t q_tmp_0_8 = tmp_qloop_13*tmp_qloop_21; + const real_t q_tmp_0_9 = tmp_qloop_1*tmp_qloop_21; + const real_t q_tmp_0_10 = tmp_qloop_14*tmp_qloop_21; + const real_t q_tmp_0_11 = tmp_qloop_16*tmp_qloop_21; + const real_t q_tmp_1_0 = tmp_qloop_23*tmp_qloop_6; + const real_t q_tmp_1_1 = tmp_qloop_12*tmp_qloop_23; + const real_t q_tmp_1_2 = tmp_qloop_13*tmp_qloop_23; + const real_t q_tmp_1_3 = tmp_qloop_15*tmp_qloop_25; + const real_t q_tmp_1_4 = tmp_qloop_14*tmp_qloop_23; + const real_t q_tmp_1_5 = tmp_qloop_16*tmp_qloop_23; + const real_t q_tmp_1_6 = tmp_qloop_26*tmp_qloop_6; + const real_t q_tmp_1_7 = tmp_qloop_12*tmp_qloop_26; + const real_t q_tmp_1_8 = tmp_qloop_13*tmp_qloop_26; + const real_t q_tmp_1_9 = tmp_qloop_15*tmp_qloop_27; + const real_t q_tmp_1_10 = tmp_qloop_14*tmp_qloop_26; + const real_t q_tmp_1_11 = tmp_qloop_16*tmp_qloop_26; + const real_t q_tmp_2_0 = tmp_qloop_25*tmp_qloop_6; + const real_t q_tmp_2_1 = tmp_qloop_12*tmp_qloop_25; + const real_t q_tmp_2_2 = tmp_qloop_13*tmp_qloop_25; + const real_t q_tmp_2_3 = tmp_qloop_11*tmp_qloop_28; + const real_t q_tmp_2_4 = tmp_qloop_14*tmp_qloop_25; + const real_t q_tmp_2_5 = tmp_qloop_16*tmp_qloop_25; + const real_t q_tmp_2_6 = tmp_qloop_27*tmp_qloop_6; + const real_t q_tmp_2_7 = tmp_qloop_12*tmp_qloop_27; + const real_t q_tmp_2_8 = tmp_qloop_13*tmp_qloop_27; + const real_t q_tmp_2_9 = tmp_qloop_20*tmp_qloop_28; + const real_t q_tmp_2_10 = tmp_qloop_14*tmp_qloop_27; + const real_t q_tmp_2_11 = tmp_qloop_16*tmp_qloop_27; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_7)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7); + const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_6),_mm256_mul_pd(rho_dof_1,tmp_qloop_12)),_mm256_mul_pd(rho_dof_2,tmp_qloop_13)),_mm256_mul_pd(rho_dof_3,tmp_qloop_1)),_mm256_mul_pd(rho_dof_4,tmp_qloop_14)),_mm256_mul_pd(rho_dof_5,tmp_qloop_16))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_18); + const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_22); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_22); + const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_24); + const __m256d tmp_qloop_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_17),tmp_qloop_4); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_6); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_19); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_19); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_19); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_19); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_19); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_6); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_21); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_21); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_21); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_21); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_21); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_6); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_23); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_23); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_25); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_23); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_23); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_6); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_26); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_26); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_27); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_26); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_26); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_6); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_25); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_25); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_28); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_25); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_25); + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_6); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_27); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_27); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_28); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_27); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_27); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_9 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_8 = rho_dof_0*(tmp_qloop_0 + tmp_qloop_7 - 3.0); + const real_t tmp_qloop_9 = rho_dof_1*(tmp_qloop_0 - 1.0) + rho_dof_3*tmp_qloop_7 - rho_dof_4*tmp_qloop_7 + rho_dof_5*(-tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_8; + const real_t tmp_qloop_10 = rho_dof_2*(tmp_qloop_7 - 1.0) + rho_dof_3*tmp_qloop_0 + rho_dof_4*(-tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_0 + tmp_qloop_8; + const real_t tmp_qloop_11 = jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_10; + const real_t tmp_qloop_12 = tmp_qloop_3 - _data_q_p_0[q]; + const real_t tmp_qloop_13 = tmp_qloop_5 - _data_q_p_1[q]; + const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_15 = tmp_qloop_2*4.0; + const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; + const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; + const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10; + const real_t tmp_qloop_21 = tmp_qloop_18*tmp_qloop_20; + const real_t tmp_qloop_22 = tmp_qloop_17*_data_q_p_0[q]; + const real_t tmp_qloop_23 = tmp_qloop_11*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_17*_data_q_p_1[q]; + const real_t tmp_qloop_25 = tmp_qloop_11*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_20*tmp_qloop_22; + const real_t tmp_qloop_27 = tmp_qloop_20*tmp_qloop_24; + const real_t tmp_qloop_28 = tmp_qloop_0*tmp_qloop_17*tmp_qloop_4; + const real_t q_tmp_0_0 = tmp_qloop_19*tmp_qloop_6; + const real_t q_tmp_0_1 = tmp_qloop_12*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_13*tmp_qloop_19; + const real_t q_tmp_0_3 = tmp_qloop_1*tmp_qloop_19; + const real_t q_tmp_0_4 = tmp_qloop_14*tmp_qloop_19; + const real_t q_tmp_0_5 = tmp_qloop_16*tmp_qloop_19; + const real_t q_tmp_0_6 = tmp_qloop_21*tmp_qloop_6; + const real_t q_tmp_0_7 = tmp_qloop_12*tmp_qloop_21; + const real_t q_tmp_0_8 = tmp_qloop_13*tmp_qloop_21; + const real_t q_tmp_0_9 = tmp_qloop_1*tmp_qloop_21; + const real_t q_tmp_0_10 = tmp_qloop_14*tmp_qloop_21; + const real_t q_tmp_0_11 = tmp_qloop_16*tmp_qloop_21; + const real_t q_tmp_1_0 = tmp_qloop_23*tmp_qloop_6; + const real_t q_tmp_1_1 = tmp_qloop_12*tmp_qloop_23; + const real_t q_tmp_1_2 = tmp_qloop_13*tmp_qloop_23; + const real_t q_tmp_1_3 = tmp_qloop_15*tmp_qloop_25; + const real_t q_tmp_1_4 = tmp_qloop_14*tmp_qloop_23; + const real_t q_tmp_1_5 = tmp_qloop_16*tmp_qloop_23; + const real_t q_tmp_1_6 = tmp_qloop_26*tmp_qloop_6; + const real_t q_tmp_1_7 = tmp_qloop_12*tmp_qloop_26; + const real_t q_tmp_1_8 = tmp_qloop_13*tmp_qloop_26; + const real_t q_tmp_1_9 = tmp_qloop_15*tmp_qloop_27; + const real_t q_tmp_1_10 = tmp_qloop_14*tmp_qloop_26; + const real_t q_tmp_1_11 = tmp_qloop_16*tmp_qloop_26; + const real_t q_tmp_2_0 = tmp_qloop_25*tmp_qloop_6; + const real_t q_tmp_2_1 = tmp_qloop_12*tmp_qloop_25; + const real_t q_tmp_2_2 = tmp_qloop_13*tmp_qloop_25; + const real_t q_tmp_2_3 = tmp_qloop_11*tmp_qloop_28; + const real_t q_tmp_2_4 = tmp_qloop_14*tmp_qloop_25; + const real_t q_tmp_2_5 = tmp_qloop_16*tmp_qloop_25; + const real_t q_tmp_2_6 = tmp_qloop_27*tmp_qloop_6; + const real_t q_tmp_2_7 = tmp_qloop_12*tmp_qloop_27; + const real_t q_tmp_2_8 = tmp_qloop_13*tmp_qloop_27; + const real_t q_tmp_2_9 = tmp_qloop_20*tmp_qloop_28; + const real_t q_tmp_2_10 = tmp_qloop_14*tmp_qloop_27; + const real_t q_tmp_2_11 = tmp_qloop_16*tmp_qloop_27; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d4b9475dad03e458e865791f3643311d68769b33 --- /dev/null +++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp @@ -0,0 +1,6458 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; + + const real_t _data_q_p_0 [] = {0.25, 0.16666666666666666, 0.16666666666666666, 0.5, 0.16666666666666666}; + + const real_t _data_q_p_1 [] = {0.25, 0.16666666666666666, 0.5, 0.16666666666666666, 0.16666666666666666}; + + const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); + { + /* CellType.WHITE_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_0_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_0_WHITE_UP*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_2_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_2_WHITE_UP*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); + { + /* CellType.WHITE_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_WHITE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_WHITE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); + { + /* CellType.BLUE_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_0_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_0_BLUE_UP*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_2_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_2_BLUE_UP*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); + { + /* CellType.BLUE_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_BLUE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_BLUE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); + { + /* CellType.GREEN_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_0_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_0_GREEN_UP*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_2_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_2_GREEN_UP*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); + { + /* CellType.GREEN_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_GREEN_DOWN*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_GREEN_DOWN*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp similarity index 78% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp index 918e7cb31e3e7dbdea61e0a3598737d509ff177f..8e09db723d3b45631a27811bd09c56c7ad2418ed 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp @@ -47,6 +47,10 @@ + + + + @@ -60,7 +64,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; @@ -69,33 +73,55 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; { /* FaceType.GRAY */ - const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; - const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; - const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; - const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; - const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY); - const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; @@ -153,18 +179,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * for (int64_t q = 0; q < 4; q += 1) { const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; - const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); - const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_8 = -tmp_qloop_7; - const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); - const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; - const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; - const real_t tmp_qloop_12 = tmp_qloop_11*1.0; const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); @@ -203,11 +222,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; const real_t tmp_qloop_52 = tmp_qloop_39*4.0; const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; - const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; - const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); - const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; - const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; - const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; @@ -215,6 +229,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; @@ -316,37 +335,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; } } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); { /* FaceType.BLUE */ - const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; - const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; - const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; - const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; - const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; - const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; - const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; - const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; - const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE); - const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; @@ -404,18 +438,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * for (int64_t q = 0; q < 4; q += 1) { const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; - const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); - const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_8 = -tmp_qloop_7; - const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); - const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; - const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; - const real_t tmp_qloop_12 = tmp_qloop_11*1.0; const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); @@ -454,11 +481,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; const real_t tmp_qloop_52 = tmp_qloop_39*4.0; const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; - const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; - const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); - const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; - const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; - const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; @@ -466,6 +488,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp similarity index 80% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp index 832f73059301307e4fbdb377338cce11c1565ac0..e9465ae26d2d5fc56eeb37fbdfadeb678947fad7 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp @@ -47,6 +47,10 @@ + + + + @@ -60,7 +64,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; @@ -69,33 +73,55 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; { /* FaceType.GRAY */ - const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; - const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; - const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; - const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; - const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY); - const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; @@ -141,18 +167,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t for (int64_t q = 0; q < 4; q += 1) { const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; - const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); - const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_8 = -tmp_qloop_7; - const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); - const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; - const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; - const real_t tmp_qloop_12 = tmp_qloop_11*1.0; const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); @@ -191,11 +210,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; const real_t tmp_qloop_52 = tmp_qloop_39*4.0; const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; - const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; - const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); - const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; - const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; - const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; @@ -203,6 +217,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; @@ -398,37 +417,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); { /* FaceType.BLUE */ - const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; - const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; - const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; - const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; - const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; - const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; - const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; - const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; - const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE); - const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; @@ -474,18 +508,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t for (int64_t q = 0; q < 4; q += 1) { const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; - const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); - const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_8 = -tmp_qloop_7; - const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); - const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; - const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; - const real_t tmp_qloop_12 = tmp_qloop_11*1.0; const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); @@ -524,11 +551,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; const real_t tmp_qloop_52 = tmp_qloop_39*4.0; const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; - const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; - const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); - const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; - const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; - const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; @@ -536,6 +558,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp similarity index 82% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp index 77f66b1a81cb4ba60ba7ca5e129e052a1f951eac..dc411f79a2c4bf009639ca53bc6fa405cd790316 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp @@ -47,6 +47,10 @@ + + + + @@ -60,7 +64,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; @@ -71,61 +75,110 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); + const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; + const real_t tmp_qloop_1 = -rayVertex_0; + const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; + const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; + const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; + const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; + const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; + const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; + const real_t tmp_qloop_8 = -rayVertex_1; + const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; + const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; + const real_t tmp_qloop_12 = -rayVertex_2; + const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; + const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; + const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; + const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; + const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); + const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; + const real_t tmp_qloop_21 = radRayVertex - radRefVertex; + const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; + const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; + const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; + const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; + const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; { /* CellType.WHITE_UP */ - const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; - const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; - const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; - const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; - const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; - const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; - const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; - const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; - const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; - const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; - const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; - const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; - const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; - const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); - const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); - const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); - const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; @@ -288,28 +341,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -320,28 +353,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -349,11 +375,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -415,14 +439,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -440,6 +457,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -749,73 +773,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; } } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); { /* CellType.WHITE_DOWN */ - const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; - const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; - const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; - const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; - const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; - const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; - const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; - const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; - const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; - const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; - const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; - const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; - const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; - const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; - const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; - const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; - const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); - const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); - const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); - const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -978,28 +1022,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -1010,28 +1034,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -1039,11 +1056,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -1105,14 +1120,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -1130,6 +1138,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -1439,67 +1454,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); { /* CellType.BLUE_UP */ - const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; - const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; - const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; - const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; - const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; - const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; - const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; - const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; - const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; - const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; - const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; - const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; - const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; - const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; - const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); - const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); - const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); - const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -1662,28 +1697,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -1694,28 +1709,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -1723,11 +1731,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -1789,14 +1795,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -1814,6 +1813,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -2123,70 +2129,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); { /* CellType.BLUE_DOWN */ - const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; - const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; - const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; - const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; - const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; - const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; - const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; - const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; - const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; - const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; - const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; - const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; - const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; - const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; - const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; - const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; - const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; - const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; - const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; - const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; - const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); - const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); - const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); - const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -2349,28 +2375,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -2381,28 +2387,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -2410,11 +2409,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -2476,14 +2473,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -2501,6 +2491,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -2810,67 +2807,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; } } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); { /* CellType.GREEN_UP */ - const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; - const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; - const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; - const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; - const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; - const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; - const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; - const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; - const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; - const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; - const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; - const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; - const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; - const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; - const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; - const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; - const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; - const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); - const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); - const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); - const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -3033,28 +3050,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -3065,28 +3062,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -3094,11 +3084,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -3160,14 +3148,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -3185,6 +3166,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -3494,70 +3482,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); { /* CellType.GREEN_DOWN */ - const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; - const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; - const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; - const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; - const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; - const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; - const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; - const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; - const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; - const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; - const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; - const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; - const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; - const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; - const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; - const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; - const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; - const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); - const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); - const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); - const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -3720,28 +3728,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -3752,28 +3740,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -3781,11 +3762,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -3847,14 +3826,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -3872,6 +3844,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp similarity index 84% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp index 05c564b414a539295d5dd4cd7d048ea03afb7944..06750a93599ab531722dd66974cc79d02902fcda 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp @@ -47,6 +47,10 @@ + + + + @@ -60,7 +64,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_edge_2, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, idx_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_edge_2, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, idx_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; @@ -71,61 +75,110 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); + const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; + const real_t tmp_qloop_1 = -rayVertex_0; + const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; + const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; + const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; + const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; + const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; + const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; + const real_t tmp_qloop_8 = -rayVertex_1; + const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; + const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; + const real_t tmp_qloop_12 = -rayVertex_2; + const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; + const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; + const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; + const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; + const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); + const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; + const real_t tmp_qloop_21 = radRayVertex - radRefVertex; + const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; + const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; + const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; + const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; + const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; { /* CellType.WHITE_UP */ - const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; - const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; - const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; - const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; - const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; - const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; - const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; - const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; - const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; - const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; - const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; - const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; - const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; - const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); - const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); - const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); - const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; @@ -258,28 +311,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -290,28 +323,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -319,11 +345,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -385,14 +409,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -410,6 +427,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -998,73 +1022,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); { /* CellType.WHITE_DOWN */ - const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; - const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; - const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; - const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; - const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; - const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; - const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; - const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; - const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; - const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; - const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; - const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; - const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; - const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; - const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; - const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; - const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); - const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); - const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); - const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -1197,28 +1241,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -1229,28 +1253,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -1258,11 +1275,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -1324,14 +1339,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -1349,6 +1357,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -1937,67 +1952,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); { /* CellType.BLUE_UP */ - const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; - const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; - const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; - const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; - const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; - const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; - const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; - const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; - const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; - const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; - const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; - const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; - const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; - const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; - const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); - const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); - const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); - const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -2130,28 +2165,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -2162,28 +2177,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -2191,11 +2199,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -2257,14 +2263,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -2282,6 +2281,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -2870,70 +2876,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); { /* CellType.BLUE_DOWN */ - const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; - const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; - const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; - const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; - const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; - const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; - const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; - const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; - const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; - const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; - const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; - const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; - const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; - const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; - const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; - const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; - const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; - const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; - const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; - const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; - const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); - const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); - const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); - const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -3066,28 +3092,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -3098,28 +3104,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -3127,11 +3126,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -3193,14 +3190,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -3218,6 +3208,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -3806,67 +3803,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); { /* CellType.GREEN_UP */ - const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; - const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; - const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; - const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; - const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; - const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; - const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; - const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; - const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; - const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; - const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; - const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; - const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; - const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; - const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; - const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; - const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; - const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); - const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); - const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); - const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -3999,28 +4016,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -4031,28 +4028,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -4060,11 +4050,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -4126,14 +4114,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -4151,6 +4132,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -4739,70 +4727,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); { /* CellType.GREEN_DOWN */ - const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; - const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; - const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; - const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; - const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; - const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; - const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; - const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; - const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; - const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; - const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; - const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; - const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; - const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; - const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; - const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; - const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; - const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); - const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); - const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); - const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -4935,28 +4943,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -4967,28 +4955,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -4996,11 +4977,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -5062,14 +5041,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -5087,6 +5059,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp similarity index 74% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp index a161a3a891e6c0ac30f296941dcb4638b4cc48bb..f1f6f39b8279793a6dadb807e4222cf4625b6aaa 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; @@ -67,33 +71,48 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); { /* FaceType.GRAY */ - const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; - const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; - const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; - const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; - const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY); - const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; @@ -167,7 +186,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; const real_t tmp_qloop_15 = tmp_qloop_2*4.0; const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; - const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10; @@ -260,37 +279,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; } } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); { /* FaceType.BLUE */ - const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; - const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; - const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; - const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; - const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; - const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; - const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; - const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; - const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE); - const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; @@ -364,7 +398,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; const real_t tmp_qloop_15 = tmp_qloop_2*4.0; const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; - const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp similarity index 78% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp index 45073514b3c4518e98077e3e54bc3fea9aaa8f0b..f8bf62a37d732dbd46704fd2933f668391cd44d9 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; @@ -69,61 +73,81 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); { /* CellType.WHITE_UP */ - const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; - const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; - const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; - const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; - const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; - const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; - const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; - const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; - const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; - const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; - const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; - const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; - const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; - const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); - const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); - const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); - const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; @@ -318,7 +342,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22; @@ -598,73 +622,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; } } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); { /* CellType.WHITE_DOWN */ - const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; - const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; - const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; - const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; - const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; - const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; - const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; - const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; - const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; - const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; - const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; - const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; - const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; - const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; - const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; - const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; - const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); - const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); - const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); - const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -859,7 +903,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22; @@ -1139,67 +1183,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); { /* CellType.BLUE_UP */ - const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; - const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; - const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; - const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; - const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; - const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; - const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; - const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; - const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; - const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; - const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; - const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; - const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; - const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; - const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); - const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); - const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); - const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -1394,7 +1458,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22; @@ -1674,70 +1738,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); { /* CellType.BLUE_DOWN */ - const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; - const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; - const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; - const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; - const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; - const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; - const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; - const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; - const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; - const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; - const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; - const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; - const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; - const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; - const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; - const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; - const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; - const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; - const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; - const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; - const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); - const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); - const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); - const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -1932,7 +2016,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22; @@ -2212,67 +2296,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; } } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); { /* CellType.GREEN_UP */ - const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; - const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; - const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; - const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; - const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; - const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; - const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; - const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; - const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; - const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; - const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; - const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; - const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; - const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; - const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; - const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; - const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; - const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); - const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); - const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); - const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -2467,7 +2571,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22; @@ -2747,70 +2851,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); { /* CellType.GREEN_DOWN */ - const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; - const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; - const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; - const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; - const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; - const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; - const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; - const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; - const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; - const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; - const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; - const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; - const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; - const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; - const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; - const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; - const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; - const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); - const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); - const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); - const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -3005,7 +3129,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp similarity index 77% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp index 8d57d0886cb9b5125b602fb36326a9ed2e7c512d..d13817407729ac76800502046b086bf051d2d9b0 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; @@ -67,33 +71,48 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); { /* FaceType.GRAY */ - const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; - const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; - const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; - const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; - const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY); - const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; @@ -155,7 +174,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; const real_t tmp_qloop_15 = tmp_qloop_2*4.0; const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; - const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10; @@ -342,37 +361,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); { /* FaceType.BLUE */ - const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; - const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; - const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; - const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; - const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; - const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; - const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; - const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; - const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE); - const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; @@ -434,7 +468,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; const real_t tmp_qloop_15 = tmp_qloop_2*4.0; const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; - const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp similarity index 81% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp index 0945e0d939022c49e6f992754781d28b7894493a..a624289cd9de0c83e8c7a6f39514c60cae9ec0ff 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_edge_2, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, idx_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_edge_2, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, idx_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; @@ -69,61 +73,81 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); { /* CellType.WHITE_UP */ - const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; - const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; - const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; - const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; - const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; - const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; - const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; - const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; - const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; - const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; - const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; - const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; - const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; - const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); - const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); - const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); - const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; @@ -288,7 +312,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22; @@ -847,73 +871,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); { /* CellType.WHITE_DOWN */ - const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; - const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; - const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; - const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; - const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; - const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; - const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; - const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; - const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; - const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; - const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; - const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; - const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; - const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; - const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; - const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; - const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); - const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); - const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); - const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -1078,7 +1122,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22; @@ -1637,67 +1681,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); { /* CellType.BLUE_UP */ - const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; - const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; - const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; - const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; - const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; - const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; - const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; - const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; - const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; - const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; - const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; - const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; - const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; - const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; - const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); - const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); - const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); - const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -1862,7 +1926,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22; @@ -2421,70 +2485,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); { /* CellType.BLUE_DOWN */ - const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; - const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; - const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; - const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; - const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; - const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; - const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; - const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; - const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; - const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; - const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; - const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; - const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; - const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; - const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; - const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; - const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; - const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; - const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; - const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; - const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); - const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); - const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); - const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -2649,7 +2733,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22; @@ -3208,67 +3292,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); { /* CellType.GREEN_UP */ - const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; - const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; - const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; - const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; - const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; - const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; - const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; - const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; - const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; - const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; - const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; - const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; - const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; - const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; - const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; - const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; - const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; - const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); - const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); - const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); - const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -3433,7 +3537,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22; @@ -3992,70 +4096,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); { /* CellType.GREEN_DOWN */ - const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; - const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; - const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; - const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; - const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; - const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; - const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; - const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; - const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; - const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; - const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; - const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; - const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; - const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; - const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; - const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; - const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; - const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); - const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); - const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); - const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -4220,7 +4344,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22; diff --git a/operators/gradient/CMakeLists.txt b/operators/gradient/CMakeLists.txt index 0c37a89aa82b7918a363ba6ece752e02096733c6..6d3cdf3408473a9cd079ffeee1b133f215eb9876 100644 --- a/operators/gradient/CMakeLists.txt +++ b/operators/gradient/CMakeLists.txt @@ -21,40 +21,40 @@ add_library( opgen-gradient if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-gradient PRIVATE - avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp + avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp ) set_source_files_properties( - avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp + avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -65,26 +65,26 @@ else() target_sources(opgen-gradient PRIVATE - noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp ) endif() diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp index c7b6e5e1ad35e53be12053ffe2fdfd2397bdab10..b124e41b5703a564ca3bba3a5233ecc9fddb1e70 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp @@ -144,7 +144,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply( const P1Function< real_t >& this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -165,6 +165,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply( const P1Function< real_t >& refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -243,7 +244,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix( const std::shared_ptr< S this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -265,6 +266,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix( const std::shared_ptr< S refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp index d822b1adc7bf177eb42aad363721535c100d9cd7..5518ed9048e024b2ea8899fa5e11497ea4cbaa90 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,57 +79,66 @@ class P1ToP2ElementwiseGradientAnnulusMap_0_0 : public Operator< P1Function< rea protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 220 330 17 12 3 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P1ToP2ElementwiseGradientAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 202 312 17 12 3 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp index e1ee2e393ec048076f559d316defbc2d5ac429fb..08305061fd8d9ad8c3ae633367721554741f1d8d 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp @@ -144,7 +144,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply( const P1Function< real_t >& this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -165,6 +165,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply( const P1Function< real_t >& refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -243,7 +244,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix( const std::shared_ptr< S this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -265,6 +266,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix( const std::shared_ptr< S refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp index 1c03c47cce033ba6ae3d96c5cb7afb95c77125e9..eeb840ed6d17ea1df5ffeb7b6ae4e9738d7694f0 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,57 +79,66 @@ class P1ToP2ElementwiseGradientAnnulusMap_1_0 : public Operator< P1Function< rea protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 220 330 17 12 3 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P1ToP2ElementwiseGradientAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 202 312 17 12 3 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp index 53aed4cfaee41c4659956ffae71b5f175ed5e986..d4d125c3d7a01d4e8aa3dc651ef802cb5742178c 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp @@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply( const P1Function< this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply( const P1Function< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix( const std::shar this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix( const std::shar thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp index 78819b72b0eefac59b6c11edd6c0c305395fcf3f..16490ad8a616f813444ef98cd13e1e6d08fda2a4 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_0_0 : public Operator< P1Func protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 783 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 743 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp index 54ec309dfa1ca15fdb69945e0283679fb1976104..3712665543a4b249e56c6c6c08a0779e3f94cf41 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp @@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply( const P1Function< this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply( const P1Function< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix( const std::shar this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix( const std::shar thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp index 561a431e47ee80d9cbc9136ef67a28fc0f3cac1a..7be898babb3372f3022168bbf7f937ff88c1e63f 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_1_0 : public Operator< P1Func protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 783 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 743 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp index 095574db78a2f0988060b562c749a526b9337769..16dad7717654148b54223c01e13fdef254d1d947 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp @@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply( const P1Function< this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply( const P1Function< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix( const std::shar this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix( const std::shar thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp index 10e81880801930734db21f73186f7ed543517972..6d0336ae4af169f5180fd4625b6464e228332c86 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_2_0 : public Operator< P1Func protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 783 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 743 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp index afce695272d0a775ef7bec0f1d528a3a112be44a..481eb1dd61ae4e8611690100563ec59ff29e7729 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp @@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradient_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -213,7 +214,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ToP2ElementwiseGradient_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -226,6 +227,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -291,7 +293,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -311,6 +313,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -340,7 +343,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -354,6 +357,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp index b7dee5e8c62804d8edbb89f627d4dc5e4ccd68cc..f0c11fa680093f50c99753f9f06489ba53cb5934 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,88 +76,107 @@ class P1ToP2ElementwiseGradient_0_0 : public Operator< P1Function< real_t >, P2F protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradient_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 116 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 380 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 98 126 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 340 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp index 9f65cb1191fdaf598875555442b97a8583f2e8b4..c60d61764a39b092769cca0ab226de413619d816 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp @@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradient_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -213,7 +214,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ToP2ElementwiseGradient_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -226,6 +227,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -291,7 +293,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -311,6 +313,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -340,7 +343,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -354,6 +357,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp index ee0af3124612c3e3e69b1c4ef5f41f41edf129c3..8f9767541aa0fe5ffcd903b636bfe123506d776d 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,88 +76,107 @@ class P1ToP2ElementwiseGradient_1_0 : public Operator< P1Function< real_t >, P2F protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradient_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 116 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 380 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 98 126 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 340 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp index ffa952cc41ca55fd0229942be9076a6ee6f5531e..80d01f0cb3654321111a1e1d99451edb845b7706 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp @@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_2_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradient_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_2_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -220,7 +221,7 @@ void P1ToP2ElementwiseGradient_2_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -240,6 +241,7 @@ void P1ToP2ElementwiseGradient_2_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp index 7f51887193c65a9c5d430e3dac4a223a2212fb9d..810b24c2a56703be3ebf8b0c0b23934f49f7839f 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,53 +76,62 @@ class P1ToP2ElementwiseGradient_2_0 : public Operator< P1Function< real_t >, P2F protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradient_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 380 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 340 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp index 56fce1cf627308c4c84cbf1021e369823148d6d4..68d512a999c32260ae45ca5239da0a71486676bd 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp index 47d6b55f3900e73c309b09fed94f982a8278dd5e..ac6c2dca89e23bb27d1457bb7a9cb467a630b098 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp index 657796e4af08fa099f45b44a8d6a0d3209fb5691..f08adf588b2d15bb7fad5210dc11850f57c1854a 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp index d94aa20006b4ccdb90d35aca786dab17d675b19c..2b92e6b7947c8cf910b28a133bece7527289d8ad 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp index 808dafe1d5fb0103b9fca56396d4f2ee4858ea18..f0e17ec34ce19bbbbdabf015daf9fc6fa0b9f206 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp index 2cea1726d12749de45035b7b5066831cde6a9910..0b4dbee5a08f29607dc469e86d841c243793a58d 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp index 38b98cf74375e2068f45c8138111b1ad49e354a5..25e439139f43f171d4393298191625908a8f3638 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp index e446fff4fb08becc946f168db2bfd5fd9b94239f..1eb2891d9e5662f5fa7dc4d2f7783231cf11405a 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp index 3225debc59be0b7899b58abbfb9a698a45f30892..cef4dfa42bd1a4068566cbdaa22eeb73fffdc036 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp index 25c56d9610b3f3184c508648b7fa41f85c023f91..042040bb9b563b240d585aec8f6ed331c9db3f8f 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_2_0::apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp index b4312145ddb3eba167e10cc91c82250e73426120..f935d21a624192b3b9a309a727e0dbd412d004a6 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp index 28bd7239b22f4336e6f76917df16ef4c16f84eee..9588c7cdadd50c512a3913a57151ce15c69357aa 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp index 60b23b461e6289bb8e7810f876283bb8a73ccea0..6288dadc4734c1096325f1c9220923f838e2e80f 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp index ce869eef5c3ce14a5b6f5e5ae02ed8671cbd1dda..9d12e8247ae288ffd043140816a07b9bb2b9a89c 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp index d61d2c99ac8a9ffaec12507f380fda151b42f101..2eab90080db08bd18fc19549fb7af61a7382f9b8 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp index 41e1786a4dc3927a12a08a31e5398068d2d1e75a..254485aa55ba148f4b3ab5e4a32cecb884d43b65 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp index 14ec97f8cffada168b060212e8c606c176264026..5b3a5aa4acb273875a1e86ea1988e80971dabf5d 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp index 1cc81709c02cacce844a52a60d09b6c47b35a62f..3b0b1eb07ad44beb7405582cb242678a7dac50ac 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp index d45cc6292ff3fd8309e431ac4cdc564b25bdbe24..a0a11ee91df71673a8cae4e55d353be60b2a9094 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp index 980a97ef9e08687b59b5def0cd4a17083de5615f..3f146ad5f5feaf6951b57d164a62043b6561b570 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp index d71bcbed48eff2601f587ce83daff379a368ed93..f2b2c2e14071a460807c0e662cc145c275324aa0 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp index c67eeed47d11a836d5081183b89fc875b71d940a..1c9b043933e1210a1450c220d5b56d32f8ed0d7a 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp index f2d8391d6ea8d8299414eea244abc02ed096c0c4..e5020c451a60d257307b99fab25c9e1127d2bdbd 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp index 068e344b85d216541f87585e1ec7e256b2e95606..3246953899c6eaa3c23282c5f4a7962133866015 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp index 0589a1dff17ca64a48e4d59d60d2d2c5cb3f73d3..1049fd8b6f799d45dce8a0aa33f0d21754f85f8b 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp index 410c95a83aed309baedce1d443e9917e03fc618f..83284af5b8e18af85db55f83038174e6a3adf56a 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp index ce8b5c70ae7bb3a76e7dc29abd5eebd09a57c7ed..e934ca9372b195cbdd6a72e7583dd31ed9a95fa8 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp index 657f8d548740d030f0f7e528e5d6a10618782d23..ba0b7ad9cf5a71e317d16291e745e7be50aaa6e9 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp index ea2712d237c0c373d1b08a26f7d254a6a7610b87..4f0c7b743ebadc20a4130435837af8131187bdb0 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_2_0::apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp index 658ef8071d186ef136933ade08ab5a232376e715..cba146470ee3855a8d4cf1771bd480f973c2347e 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_2_0::toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/k_mass/CMakeLists.txt b/operators/k_mass/CMakeLists.txt index 71f23f388772747429871dba6e801b9c1556fcd4..51d608a7f12707809818d6bb6a568e277341caf6 100644 --- a/operators/k_mass/CMakeLists.txt +++ b/operators/k_mass/CMakeLists.txt @@ -23,62 +23,62 @@ add_library( opgen-k_mass if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-k_mass PRIVATE - avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P1ElementwiseKMass_apply_macro_2D.cpp - avx/P1ElementwiseKMass_apply_macro_3D.cpp - avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseKMass_apply_macro_2D.cpp - avx/P2ElementwiseKMass_apply_macro_3D.cpp - avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp - avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp - noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp - noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp + avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp + avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp + avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp + avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp + avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp + avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp + avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp + avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp + avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp + avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp + noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp + noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp + noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp + noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp + noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp + noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp ) set_source_files_properties( - avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P1ElementwiseKMass_apply_macro_2D.cpp - avx/P1ElementwiseKMass_apply_macro_3D.cpp - avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseKMass_apply_macro_2D.cpp - avx/P2ElementwiseKMass_apply_macro_3D.cpp - avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp - avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp + avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp + avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp + avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp + avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp + avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp + avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp + avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp + avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp + avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp + avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -89,38 +89,38 @@ else() target_sources(opgen-k_mass PRIVATE - noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P1ElementwiseKMass_apply_macro_2D.cpp - noarch/P1ElementwiseKMass_apply_macro_3D.cpp - noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp - noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseKMass_apply_macro_2D.cpp - noarch/P2ElementwiseKMass_apply_macro_3D.cpp - noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp + noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp + noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp + noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp + noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp + noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp + noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp + noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp + noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp + noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp + noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp + noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp + noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp + noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp + noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp + noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp + noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp ) endif() diff --git a/operators/k_mass/P1ElementwiseKMass.cpp b/operators/k_mass/P1ElementwiseKMass.cpp index 03b70d41f8695086b623293ed5826caf344ad703..a77a51b288746267f2094f154ab22b0031734536 100644 --- a/operators/k_mass/P1ElementwiseKMass.cpp +++ b/operators/k_mass/P1ElementwiseKMass.cpp @@ -133,7 +133,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseKMass_macro_3D( _data_dst, _data_k, @@ -152,6 +152,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -200,7 +201,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseKMass_macro_2D( _data_dst, _data_k, @@ -213,6 +214,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -276,7 +278,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseKMass_macro_3D( _data_dst, _data_k, @@ -296,6 +298,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -325,7 +328,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseKMass_macro_2D( _data_dst, _data_k, @@ -339,6 +342,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -390,7 +394,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( _data_invDiag_, _data_k, @@ -408,6 +412,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -447,7 +452,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( _data_invDiag_, _data_k, @@ -459,6 +464,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P1ElementwiseKMass.hpp b/operators/k_mass/P1ElementwiseKMass.hpp index c79a74983a8ecc01fcc247098fb0fd758ca14eaf..9252db2e19b25264b8556ec323db4842c3102d3e 100644 --- a/operators/k_mass/P1ElementwiseKMass.hpp +++ b/operators/k_mass/P1ElementwiseKMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,126 +84,155 @@ class P1ElementwiseKMass : public Operator< P1Function< real_t >, P1Function< re protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 81 73 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 189 163 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 72 64 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ElementwiseKMass_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 173 147 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseKMass_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 63 52 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 147 117 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P1Function< real_t > > invDiag_; P1Function< real_t > k; diff --git a/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp index 68c62d7d5ba75c569a2c72d18668c8f21436fe03..28e01722e768e7a942a4e7f2d12a9c2540e18a7a 100644 --- a/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp +++ b/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp @@ -135,7 +135,7 @@ void P1ElementwiseKMassAnnulusMap::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseKMassAnnulusMap_macro_2D( _data_dst, _data_k, @@ -156,6 +156,7 @@ void P1ElementwiseKMassAnnulusMap::apply( const P1Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -232,7 +233,7 @@ void P1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D( _data_dst, _data_k, @@ -254,6 +255,7 @@ void P1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -319,7 +321,7 @@ void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( _data_invDiag_, _data_k, @@ -339,6 +341,7 @@ void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp index e841291809649d0bb1b265799924d92a8973e9c1..bf249e6088d800c44d6a48c02d42927f88206a63 100644 --- a/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp +++ b/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,81 +85,95 @@ class P1ElementwiseKMassAnnulusMap : public Operator< P1Function< real_t >, P1Fu protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 333 513 24 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 324 504 24 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 315 492 24 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P1Function< real_t > > invDiag_; P1Function< real_t > k; diff --git a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp index d9b48d04b41aa5cab29cc64c240b887ed4f5938c..3eeda4e4367a4378671a3a45adb8521622f499e9 100644 --- a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp +++ b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp @@ -149,7 +149,7 @@ void P1ElementwiseKMassIcosahedralShellMap::apply( const P1Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dst, _data_k, @@ -182,6 +182,7 @@ void P1ElementwiseKMassIcosahedralShellMap::apply( const P1Function< real_t >& s thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -267,7 +268,7 @@ void P1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dst, _data_k, @@ -301,6 +302,7 @@ void P1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -377,7 +379,7 @@ void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_invDiag_, _data_k, @@ -409,6 +411,7 @@ void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp index c501ad5912e00dd2784c73cf716542c116de6457..dead3823c61a53aa7010a3d4775798a046704c98 100644 --- a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp +++ b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,117 +85,132 @@ class P1ElementwiseKMassIcosahedralShellMap : public Operator< P1Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 904 1543 51 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 888 1527 51 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 862 1497 51 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P1Function< real_t > > invDiag_; P1Function< real_t > k; diff --git a/operators/k_mass/P2ElementwiseKMass.cpp b/operators/k_mass/P2ElementwiseKMass.cpp index dd4df2bcb7a88f1076405822de061d9ab0cf6c34..46aa8ff2bb8e3d32d1407062e829bcfd61571e16 100644 --- a/operators/k_mass/P2ElementwiseKMass.cpp +++ b/operators/k_mass/P2ElementwiseKMass.cpp @@ -137,7 +137,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseKMass_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseKMass_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseKMass_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseKMass_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P2ElementwiseKMass.hpp b/operators/k_mass/P2ElementwiseKMass.hpp index 9de0449123a391e0336ad19333033a3668056b87..8946987086b3f2f23746ee559f40180db2b67cc6 100644 --- a/operators/k_mass/P2ElementwiseKMass.hpp +++ b/operators/k_mass/P2ElementwiseKMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,142 +84,171 @@ class P2ElementwiseKMass : public Operator< P2Function< real_t >, P2Function< re protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 290 300 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1117 1118 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 254 264 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseKMass_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1017 1018 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseKMass_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 170 174 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 532 523 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp index dac8d5ffcbc5b9f7c5c1648ccc857a966094085c..d5318c897b5bf5a6f7a7f8757dcb33c34917dab7 100644 --- a/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp +++ b/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp @@ -149,7 +149,7 @@ void P2ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseKMassAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp index 49ab33563e776e42ccdc89bd6433115927fd7cad..1964dced4f350a1aac8db18a05ea56e78dcf4bab 100644 --- a/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp +++ b/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,89 +85,103 @@ class P2ElementwiseKMassAnnulusMap : public Operator< P2Function< real_t >, P2Fu protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 668 960 30 30 6 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 632 924 30 30 6 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 548 834 30 30 6 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp index 929c124e0e164ec756d6fcef1fd0a9bdcb49e994..6dfcc2e21de5d655d29467fa9ec0a5c7dec6f7c7 100644 --- a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp +++ b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp @@ -153,7 +153,7 @@ void P2ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -189,6 +189,7 @@ void P2ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t >& s thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -284,7 +285,7 @@ void P2ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -321,6 +322,7 @@ void P2ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -400,7 +402,7 @@ void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -434,6 +436,7 @@ void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp index 00dddcfef5084c19c14b22f8ff56521c3bcad547..8dfcbe8a025fee39e24c10445e65692af4808041 100644 --- a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp +++ b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,125 +85,140 @@ class P2ElementwiseKMassIcosahedralShellMap : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2690 4154 69 33 11 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2590 4054 69 33 11 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2105 3559 69 33 11 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/k_mass/P2ToP1ElementwiseKMass.cpp b/operators/k_mass/P2ToP1ElementwiseKMass.cpp index f5cfd0f3102e93cdd52c67de4de970b547f68854..f445349c8860bb332ba28f99e3d2f5ccd30d7da9 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMass.cpp +++ b/operators/k_mass/P2ToP1ElementwiseKMass.cpp @@ -135,7 +135,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseKMass_macro_3D( _data_dst, _data_kEdge, @@ -156,6 +156,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -206,7 +207,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseKMass_macro_2D( _data_dst, _data_kEdge, @@ -221,6 +222,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -286,7 +288,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseKMass_macro_3D( _data_dst, _data_kEdge, @@ -308,6 +310,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -339,7 +342,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseKMass_macro_2D( _data_dst, _data_kEdge, @@ -355,6 +358,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/k_mass/P2ToP1ElementwiseKMass.hpp b/operators/k_mass/P2ToP1ElementwiseKMass.hpp index b30f8512b35f5755d06f39862754b94ec3049173..aa6a35f2ba3385cebb8edc481ddc1b262c9f45f1 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMass.hpp +++ b/operators/k_mass/P2ToP1ElementwiseKMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,96 +79,115 @@ class P2ToP1ElementwiseKMass : public Operator< P2Function< real_t >, P1Function protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 254 264 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ToP1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 892 871 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 236 246 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ToP1ElementwiseKMass_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 852 831 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ToP1ElementwiseKMass_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > k; }; diff --git a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp index d91700351531f6cdd8603bb2d9aa24ae6cc6c0b6..a250d3cb3ea059a70fb9027c034a4462dad963d0 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp +++ b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp @@ -137,7 +137,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( _data_dst, _data_kEdge, @@ -160,6 +160,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -238,7 +239,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( _data_dst, _data_kEdge, @@ -262,6 +263,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp index 8d34dcff35ee82cdb57c8b011bf938ad9d9674bf..3222cf1b1bd41e2be9c9fcde55db858d5f4ce8fd 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp +++ b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -78,61 +80,70 @@ class P2ToP1ElementwiseKMassAnnulusMap : public Operator< P2Function< real_t >, protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 632 924 30 30 6 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ToP1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 614 906 30 30 6 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > k; }; diff --git a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp index 027f78ad2c8326f2a1ec369cb2304a0393307c8f..ecb5c90d1056f712a6da4ee537054b181ac3e0f4 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp +++ b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp @@ -152,7 +152,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dst, _data_kEdge, @@ -187,6 +187,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -274,7 +275,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dst, _data_kEdge, @@ -310,6 +311,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp index 7b5a9afd4a5d6cc8bbb41d280f271cb18923920b..3bac6117eb034b8b8484df5b173d43c37dbab20e 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp +++ b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -78,85 +80,94 @@ class P2ToP1ElementwiseKMassIcosahedralShellMap : public Operator< P2Function< r protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2465 3907 69 33 11 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ToP1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2425 3867 69 33 11 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > k; }; diff --git a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index c776dd6f2c170c7ac41b1ea7c39fe439296c76aa..81ee593a73eec17ef1fc2edcb9aa1a28b8d69ad6 100644 --- a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index cf1f1cf695df5568b0bf29eb3bbbe14a75b6d73b..e5b33a9b33a2f44e438e1e41f527867db95d56fa 100644 --- a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 7b8617a3585592c28a0eb5592de9ce4eca6731ce..af403e77d83afb71138585195878707d718a1dc0 100644 --- a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 6089632cfaab54b20ff622e4e91ac33c5c87d88e..d4047efbf04325726f23258e99a8c3af7e8dd696 100644 --- a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp index 0a8b075418007e3490dd8f132f3820f915df7a84..3b788b472f4380f0644d8a1903a55defd632b5dc 100644 --- a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp index 5ccb6846844b581e326ab2da24dce87df6ca889c..8960390a3cb14cd2166b2b0510140f1ce4a48a12 100644 --- a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp index 745e3a5096ede3229bff33361630f0c5d259b89e..41ce9ee47692abdda87fe66c3c6f6b6c77b689e4 100644 --- a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp index 91886fb4103cd116779c56bd7d49fd80ac65ce3d..4a8be4ffb12e9dd937d7637db7e63ca6afa08937 100644 --- a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index c8eeaf5a1baa8e991874d6e0be7ce609050922ba..e2a9f03d0091c11e9e1e69592e2dc106acf6196b 100644 --- a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index 6e61a798169802b9e80190dcf362f0a8cd309019..433b394990b93cd112b4ea216c4f560bd8a35085 100644 --- a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 0603eaa396366cfe9481d10a30f46db2d31338bb..0e20b6d51c2f583c244d29230ec8e66f0f034f2b 100644 --- a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 0d225d7018462acffcc3730c0c945124ad748e97..49cfdad766181ffc0741d8a59c086dc7fbbe7231 100644 --- a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp index 0895583a3ce215f157e9f264d58a6de18929ef3e..798e20857b8bab5384ce5defdd755edb4c74eb3d 100644 --- a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp index 77716215dbf4f71fafd6176565ae9724edb00539..a8bd6e36dfbbbcf0181bf5261481362be6a81c22 100644 --- a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp index f6367d9791be0d0d4573b2c0470e9edfc8fbf874..3a559920477ea5d0776f1ee59b943b23f319bb65 100644 --- a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp index b32f4fb244c95d71d9b1b38d8b508154f2b3b353..a64442b1fa5ab0743a72511fa63d882081402509 100644 --- a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp index 5d10e3c8cc0fa8d370b5d6e1d527574cfd64578b..04ffa667b8e236edb3c4709f7ef37665f6d55ce8 100644 --- a/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseKMassAnnulusMap::apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 207aa64138e4b3721472620e29961952b469a290..3058fce8a3c3d023c2a61484fa386cee05346a4a 100644 --- a/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp index 6e3c69b5278601aa1955b098102cb93fef32104f..9a4caa039023168a41c984bfe38f094318ac93b2 100644 --- a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp index 05f61d829363e377a52a82b24b57238821f262cb..7f4b502402cf29f7fff4aae897b495324330206d 100644 --- a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index 1dfd53fcb0e7d6ae88efe27814ec999be2c00261..71e9d2d22433e7232427f4583ea3bc23e9ad96c5 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index 807eedc81e8ba7cb8e005f1059805423ac3f26ac..d4cc3e55244c8535c3e350d9379193340cff71e4 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index e016a2bb82a23a1e207e9e5eb69b402f5f3959d9..ae54680409f9bfa47263d4a3bdd8f5940f6df889 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index ad55c60eb5a1004ed7d2a517857416346c4e5ed8..47749c53267897b70442af3dc81b1cf29f26f53e 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index e0db2013e5a09298b0ba825d8281aab4bb03de6d..823f67edbcbdfd9dc88270b2c5719e3b2ab4b2c3 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index be0908ded0dafb2f70fb84b886eb0d3fde5952c4..9120fc348d71da4d2f4c162d061c6de968be1f03 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp similarity index 97% rename from operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp index f07d06ea55ad87269c0e528e7fbcdd14d2f82007..1686847a7afa19540a29a78e49e5e1608b38c299 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp index 0f03bea42e3a31a60b0dd14ca3cf00bc0e6bc410..f5ad2b07824e43e3c211f1b7f9763553bf9db120 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp similarity index 97% rename from operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp index afe18e1f177b40829141b4dccdd022ee12288dc7..1d9ff5ca777461d2a6eca6319d534d06dbc3283b 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp index 8ca88be96fcd939356c39b9cdc7d55a09d4abef5..8cabb2ec95c90c5badf3ecd0affeb876d80e800d 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp similarity index 97% rename from operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp index 8085859a41befefdc20e8d0a3d94f530f4f59feb..6f7dab01ac20c05162068636d5233308f2bf6c66 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::toMatrix_P1ElementwiseKMass_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp index 8d2fc53da76de648fde43a0214cd067be8dfbffe..990646a8378f2d2e61a5e46e0b08321ab2bd1144 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::toMatrix_P1ElementwiseKMass_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index 83306a2150a45b7bc75e292de38bd28c24b46fd4..b33f4e319f8c2f5a0ed6d00f33d58cafcbce5977 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index c1c5c8a03d7c6791cd9f0494822db2870230b806..5aab5da2cc1e7295a711c0390814464ed4be9d41 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index 88fd0df594be2b5d43d6e260e42ea11c4a9007fa..bf38bd105dff96b1c6434439bc920a386d382108 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 30a275c01205002ab553dc1c10d3df44318c24f0..71beb348fd1e52d2d9e40190213f49f059a9c365 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 37bea1c6c97e969c7cd851e28ba319bfe090eb27..cc866e9f4980c71f0a1a189be10828ff868bb812 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 1e9383dfd40362319666de3d17f0821b2aaa6965..fe178fb22ef1f77c555759268fdafc50372dba49 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp index 09140703007b05c418276d77221ec55450909926..65bfd3c72b48f30b9ea35b228b9be79704ecdef3 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp index 6304e35f0850f248fba29a736333241ba3742f5a..0192f7b9ae4e44bfc9fbcbf7a77cb29fa4a8ef7c 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp index 1d2a08e7d5f003c4c7ccf99b9c124ec33299e3da..efc0a36f66f52ca19100c35cbe6ed7b8e920e1c0 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp index 61a30778a1ff2d4052aaa237efe572ae0a188b03..cfc48f0cda53d8b0366fefa91f1f4746e9a78945 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp index 6036f21b1ff745481224d411b24489036dcd8120..2fcab8262f3dd583bfe8ef58ea95d76e96fc3768 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::toMatrix_P2ElementwiseKMass_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp index 6278372f3467ca4ea509b1277b91180e9316669c..8e73824517b2fdca034a5700a368a273e47462cf 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::toMatrix_P2ElementwiseKMass_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp index d4947a80e5f46d7da39fb1a8b6fe6a311782b615..195afddac451608f4b325e2e34868c9d7f3dd8d3 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseKMassAnnulusMap::apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp index fd1d142373bd4058c6bba8f5a9f56dd108ca172e..071cafb278197c93a60145d82bb4c854baf98d40 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseKMassAnnulusMap::toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index f16047bcd5fccc8b74e330891c3de95cc6117120..e84fa7b7b46dec874ef662c7a42262862476d241 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 669f4f0a7043fc5d5691a25f21b106237d2eef86..7ba05ac7b3d8cc4a52342eef049bcb923b827da2 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp index 0b07e33354f862e2b58203a074dc069c00fb7240..24c2c68f129498f35b8101abfbf2b200005aaf7b 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp index be10ce61c5006a829b45041d9a932ef9a520279f..2e061eb20c466daf14cb35806f40c863f5db1462 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp index 37bd3800bc0ea879fa6e136f2caf0a155154df1a..b896d870774e2db251d3599bea35635779ce65a6 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::toMatrix_P2ToP1ElementwiseKMass_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp index 38e31c2383a6f98da71c5a3f40bfd9143c19a42d..16fa83dc7003bd20395dba28cbed45138012faaf 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::toMatrix_P2ToP1ElementwiseKMass_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/CMakeLists.txt b/operators/mass/CMakeLists.txt index 999c1ae553d7739723b8f092309dd51624a4e5b7..9ed525d01bca723d249d6d69ba760ae769a6c4d3 100644 --- a/operators/mass/CMakeLists.txt +++ b/operators/mass/CMakeLists.txt @@ -13,40 +13,40 @@ add_library( opgen-mass if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-mass PRIVATE - avx/P1ElementwiseMass_apply_macro_2D.cpp - avx/P1ElementwiseMass_apply_macro_3D.cpp - avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseMass_apply_macro_2D.cpp - avx/P2ElementwiseMass_apply_macro_3D.cpp - avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp - noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp - noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp - noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp + avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp + avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp + avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp + avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp + avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp + avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp + avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp + avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp + noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp + noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp + noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp + noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp ) set_source_files_properties( - avx/P1ElementwiseMass_apply_macro_2D.cpp - avx/P1ElementwiseMass_apply_macro_3D.cpp - avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseMass_apply_macro_2D.cpp - avx/P2ElementwiseMass_apply_macro_3D.cpp - avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp + avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp + avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp + avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp + avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp + avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp + avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp + avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -57,24 +57,24 @@ else() target_sources(opgen-mass PRIVATE - noarch/P1ElementwiseMass_apply_macro_2D.cpp - noarch/P1ElementwiseMass_apply_macro_3D.cpp - noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp - noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp - noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseMass_apply_macro_2D.cpp - noarch/P2ElementwiseMass_apply_macro_3D.cpp - noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp - noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp + noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp + noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp + noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp + noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp + noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp + noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp + noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp + noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp + noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp + noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp + noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp + noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp ) endif() diff --git a/operators/mass/P1ElementwiseMass.cpp b/operators/mass/P1ElementwiseMass.cpp index c0cb917236d20a5b6ed9d82aceb7bfe705545506..4d2d4680958146face984e2235d0ed1de5ea982e 100644 --- a/operators/mass/P1ElementwiseMass.cpp +++ b/operators/mass/P1ElementwiseMass.cpp @@ -124,7 +124,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseMass_macro_3D( _data_dst, _data_src, @@ -142,6 +142,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -189,7 +190,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseMass_macro_2D( _data_dst, _data_src, @@ -201,6 +202,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -261,7 +263,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseMass_macro_3D( _data_dst, _data_src, @@ -280,6 +282,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -308,7 +311,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseMass_macro_2D( _data_dst, _data_src, @@ -321,6 +324,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +373,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -386,6 +390,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -424,7 +429,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -435,6 +440,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/mass/P1ElementwiseMass.hpp b/operators/mass/P1ElementwiseMass.hpp index c9f4ea4f0fb7c5f2ab6e6ccfcbe82d219d0b8f01..cb592e21c5f4c9b701d5eadfb2e40d579c0155e0 100644 --- a/operators/mass/P1ElementwiseMass.hpp +++ b/operators/mass/P1ElementwiseMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -78,120 +80,149 @@ class P1ElementwiseMass : public Operator< P1Function< real_t >, P1Function< rea protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 59 51 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ElementwiseMass_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 149 128 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseMass_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 50 42 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ElementwiseMass_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 133 112 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseMass_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 44 33 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 113 88 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P1Function< real_t > > invDiag_; }; diff --git a/operators/mass/P2ElementwiseMass.cpp b/operators/mass/P2ElementwiseMass.cpp index b061b46498737792c83bad219111cabc38a3fd24..2cf057fefa8cc5a81ad56affb84521d3d1366b72 100644 --- a/operators/mass/P2ElementwiseMass.cpp +++ b/operators/mass/P2ElementwiseMass.cpp @@ -127,7 +127,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseMass_macro_3D( _data_dstEdge, _data_dstVertex, @@ -147,6 +147,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -214,7 +215,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseMass_macro_2D( _data_dstEdge, _data_dstVertex, @@ -228,6 +229,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -294,7 +296,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseMass_macro_3D( _data_dstEdge, _data_dstVertex, @@ -315,6 +317,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -345,7 +348,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseMass_macro_2D( _data_dstEdge, _data_dstVertex, @@ -360,6 +363,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -410,7 +414,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -428,6 +432,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -470,7 +475,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -482,6 +487,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/mass/P2ElementwiseMass.hpp b/operators/mass/P2ElementwiseMass.hpp index 21eb0970687c9db85ad5c306e84d9bb0f1814284..acfec55913cfd6aba6c0c8bbd681dc5afc5c4904 100644 --- a/operators/mass/P2ElementwiseMass.hpp +++ b/operators/mass/P2ElementwiseMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -78,130 +80,159 @@ class P2ElementwiseMass : public Operator< P2Function< real_t >, P2Function< rea protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 260 336 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseMass_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1238 1327 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseMass_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 224 300 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseMass_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1138 1227 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseMass_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 140 180 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 433 479 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/mass/P2ElementwiseMassAnnulusMap.cpp b/operators/mass/P2ElementwiseMassAnnulusMap.cpp index 5ed2042eec8a4db0534363ebcd4f66fc9476881c..c1240ffd859e36197c946f991ad524cc3bd105b4 100644 --- a/operators/mass/P2ElementwiseMassAnnulusMap.cpp +++ b/operators/mass/P2ElementwiseMassAnnulusMap.cpp @@ -144,7 +144,7 @@ void P2ElementwiseMassAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseMassAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -166,6 +166,7 @@ void P2ElementwiseMassAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -245,7 +246,7 @@ void P2ElementwiseMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseMassAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -268,6 +269,7 @@ void P2ElementwiseMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixP refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -332,7 +334,7 @@ void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -352,6 +354,7 @@ void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/mass/P2ElementwiseMassAnnulusMap.hpp b/operators/mass/P2ElementwiseMassAnnulusMap.hpp index 94b5634d0481adc2d09ddcd2f35634c342e77a5f..f0f2aafeb5c2346d5369f940f1e6a710bad9dd20 100644 --- a/operators/mass/P2ElementwiseMassAnnulusMap.hpp +++ b/operators/mass/P2ElementwiseMassAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -79,83 +81,97 @@ class P2ElementwiseMassAnnulusMap : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 638 996 30 30 6 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 602 960 30 30 6 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseMassAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 518 840 30 30 6 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp b/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp index a2a18b33ebd5268aea0b181e75817f31d14dc57b..05302703756607db6a5c918f164e950e3055051a 100644 --- a/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp +++ b/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp @@ -146,7 +146,7 @@ void P2ElementwiseMassIcosahedralShellMap::apply( const P2Function< real_t >& sr this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -180,6 +180,7 @@ void P2ElementwiseMassIcosahedralShellMap::apply( const P2Function< real_t >& sr thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -271,7 +272,7 @@ void P2ElementwiseMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spar this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -306,6 +307,7 @@ void P2ElementwiseMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spar thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -381,7 +383,7 @@ void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues( this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -413,6 +415,7 @@ void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues( thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp b/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp index 5df746e504ab96f2f8da252597cafae5efb9d41a..67f5ee40e6f2994ffb61488b18ae08ffa621fb24 100644 --- a/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp +++ b/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -79,119 +81,134 @@ class P2ElementwiseMassIcosahedralShellMap : public Operator< P2Function< real_t protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2811 4363 69 33 11 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2711 4263 69 33 11 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2006 3515 69 33 11 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp rename to operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp index 9c0c900438a68ebe589fa83df58bf896a809d805..ec947a3809487298b85fa6ba4e0975383cb404e1 100644 --- a/operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp +++ b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::apply_P1ElementwiseMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp rename to operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp index 509bbba72380de4df5464d8c792dfa1af19e9ef2..0087945fc3cc201e1b74b8d49c649aeef145e084 100644 --- a/operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp +++ b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::apply_P1ElementwiseMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp similarity index 98% rename from operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp index d3c90d05d4c445cafc1c2a1e4660435ecc5eb978..76afd8ad54cfa64d4b5b7eb15edd14c5dfcbcf2e 100644 --- a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp index 23b54bc380fa7f140591d616eedd5ba9023d5856..c0c94466bb0389b11ffffe11516dbb4e36fd3072 100644 --- a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp rename to operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp index 1894c2e7b42bca80a16e1cb3294d5f30ebfca751..356847c2472aeaa7c0a095394d9fb7a5ea09be0e 100644 --- a/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp index 472f55684f0055eef490556086894eeae5d7512e..ae52b27075c58d8e43094a520b49f60e5f4a658f 100644 --- a/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 618488dd8cb338500a761152b559632cb1d152cc..a2dc6f497a7fd15b24b3861b1ae83417275ea62e 100644 --- a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 543d0fc8fea03bc448d6dc5c29cab2b2f8ec1e7a..bd9c7f803ff923b863bb44b4213752ea4173e676 100644 --- a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp rename to operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp index 6ae1b62a2f4e7358e3ee90135fa8524d1278b0d5..cf949b84a5e256e9a3690d30bb2967b88e5b1b23 100644 --- a/operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp +++ b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::apply_P2ElementwiseMass_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp rename to operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp index 12520616ff08b4cd07f3389effd68370ebf4e518..45369cfdce44894b698e7b2ee1b949dfcd110a26 100644 --- a/operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp +++ b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::apply_P2ElementwiseMass_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp index e99cef7b032a08af58661d51a94e01da78957b5a..6325548010eb76918fe388757a02a75c34fb62aa 100644 --- a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp index f27a0a3df71da7477ab14ae2273f3cb62fb62fca..4cfb13011fd8d3405769df0011cc3d425aedf5f1 100644 --- a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp similarity index 97% rename from operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp rename to operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp index 50d93e68840b0b242d30a1d17ab6f982394fcc42..285c5a3e868a3f2f4d909ad6a04da9e9c1e8d49d 100644 --- a/operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::apply_P1ElementwiseMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp rename to operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp index 6be52e21e9a44e944812e084c18001b8eae48228..aecb86323d603c0136c8f38913688c30e2c35127 100644 --- a/operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::apply_P1ElementwiseMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp similarity index 97% rename from operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp index 39fda7816024596c2990ab1bea323aa0553367cf..739e5dcc82c8307fe97b2967206973839f314b6a 100644 --- a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp index 48fe294153312b35c2a1c712796d6dd699bc88b3..7a311bcda634814a2ae1d652c8c8a259e25d3070 100644 --- a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp similarity index 97% rename from operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp rename to operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp index 67fe128fef635456682b5cc912ee2aaee3a7e218..2997ba3aa3a5db214e6f4aab9d10c0934027775d 100644 --- a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::toMatrix_P1ElementwiseMass_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp rename to operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp index aab7834054d09d8f8ac5476adaf119b065ad7ba6..c264411cd56b63bbab3046e0d918ca8c5f484483 100644 --- a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::toMatrix_P1ElementwiseMass_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp index f9590fda46ef662bb56dea30e51a3b3b8adef75d..81da3f28e7d20d45964000f5ec323df564ae8c0c 100644 --- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp index 66950b7c7ceaf39657dd37ffe0312ddc2f995622..e48f504bf66ea5aad0302dfd1bcaa09ba9f7e22b 100644 --- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp index c6b091cc7ce77c75be95bd7ee40316b8dd2fd0fd..a44577fcb3edef20c4dd7e40988ff852b9e364d0 100644 --- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::toMatrix_P2ElementwiseMassAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 27a6f56ae104e7d49938c58c4ffadccee2b265a1..9d68eacf02c2ce4c5f5b8144cd541d028eb83dd6 100644 --- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 1a772c200d07c1295f6755701301b51b1f312efb..c72291d373cbbaac15eaa371ead2cbbbaab9ee23 100644 --- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 22d4b441c3ee25c5af834a13d510a0055bd0a3db..c847625813ea10cc07520a060aa07fd887fe5aa9 100644 --- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp index 2e75a97db51968e3595def1b3fd6c472587210d0..ec2ae4c00460e82f39639090957cf9f01ce6c0ce 100644 --- a/operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::apply_P2ElementwiseMass_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp index 4ec9c161fde70c0a3f2817b1fb278860d8b5124d..6cf210a4cb109288ea9efe74f683c4463b70c55c 100644 --- a/operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::apply_P2ElementwiseMass_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp similarity index 97% rename from operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp index 834628779af039a2071ae8c42d1a7311da80fcf9..cf9a8dd1b5ef767cb5e67ef7fb852e5f3d082b99 100644 --- a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp index 2c456b576c922c154e908ce5c84e21ffeae00b9d..49ad0528e3ad43662f10f682542d22d7172e67c3 100644 --- a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp index 134890c593be40bc6226ce5b5e451a419db8d430..e0e35dbc4b2edfff4fc12988b25a551960e76b28 100644 --- a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::toMatrix_P2ElementwiseMass_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp index 4bdda5f7e80dcc3b7b2c239c415efe59ff45464d..bf026d00369fe72843a4564cbc822219dc9f2c2e 100644 --- a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::toMatrix_P2ElementwiseMass_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/shear_heating/CMakeLists.txt b/operators/shear_heating/CMakeLists.txt index 8c5a4622270950b7b4470f1d317923274b909c80..c60a8feee48d84b659bb144b0b56a726fdabf85c 100644 --- a/operators/shear_heating/CMakeLists.txt +++ b/operators/shear_heating/CMakeLists.txt @@ -11,30 +11,30 @@ add_library( opgen-shear_heating if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-shear_heating PRIVATE - avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseShearHeating_apply_macro_2D.cpp - avx/P2ElementwiseShearHeating_apply_macro_3D.cpp - avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp - noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp + avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp + avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp + avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp + avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp + noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp + noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp ) set_source_files_properties( - avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseShearHeating_apply_macro_2D.cpp - avx/P2ElementwiseShearHeating_apply_macro_3D.cpp - avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp + avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp + avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp + avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -45,18 +45,18 @@ else() target_sources(opgen-shear_heating PRIVATE - noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp - noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp - noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp - noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp + noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp + noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp + noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp + noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp + noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp + noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp ) endif() diff --git a/operators/shear_heating/P2ElementwiseShearHeating.cpp b/operators/shear_heating/P2ElementwiseShearHeating.cpp index c862b806dddf1e02deb97cf97c508f70e581bedb..bb2058722e34d505987d98d788b0755b03901ba3 100644 --- a/operators/shear_heating/P2ElementwiseShearHeating.cpp +++ b/operators/shear_heating/P2ElementwiseShearHeating.cpp @@ -161,7 +161,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseShearHeating_macro_3D( _data_dstEdge, _data_dstVertex, @@ -189,6 +189,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -262,7 +263,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseShearHeating_macro_2D( _data_dstEdge, _data_dstVertex, @@ -282,6 +283,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -367,7 +369,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseShearHeating_macro_3D( _data_dstEdge, _data_dstVertex, @@ -396,6 +398,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -437,7 +440,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseShearHeating_macro_2D( _data_dstEdge, _data_dstVertex, @@ -458,6 +461,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -527,7 +531,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -553,6 +557,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -606,7 +611,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -624,6 +629,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/shear_heating/P2ElementwiseShearHeating.hpp b/operators/shear_heating/P2ElementwiseShearHeating.hpp index 1fd9f687891caf1b5e344bfc7745986b241ba4ed..cda1dd4c24ea107c942c433e9512555bf16e3594 100644 --- a/operators/shear_heating/P2ElementwiseShearHeating.hpp +++ b/operators/shear_heating/P2ElementwiseShearHeating.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -107,172 +109,201 @@ class P2ElementwiseShearHeating : public Operator< P2Function< real_t >, P2Funct protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseShearHeating + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 352 400 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseShearHeating_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1268 1327 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseShearHeating_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 316 364 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseShearHeating_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1168 1227 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseShearHeating_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 262 284 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 953 962 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp index 04eeb81d420b0268ab73a25586cda530c30b2b0c..32da46ef056505bdaa21777161ece0c2f8258337 100644 --- a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp +++ b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp @@ -159,7 +159,7 @@ void P2ElementwiseShearHeatingAnnulusMap::apply( const P2Function< real_t >& src this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -187,6 +187,7 @@ void P2ElementwiseShearHeatingAnnulusMap::apply( const P2Function< real_t >& src refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -282,7 +283,7 @@ void P2ElementwiseShearHeatingAnnulusMap::toMatrix( const std::shared_ptr< Spars this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -311,6 +312,7 @@ void P2ElementwiseShearHeatingAnnulusMap::toMatrix( const std::shared_ptr< Spars refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -391,7 +393,7 @@ void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -417,6 +419,7 @@ void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp index 617e2c697b69efdf55e3e117b1b4095a88a4a9de..690d3bba79c6f987dda2f303357dd798d7b0e58e 100644 --- a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp +++ b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -107,101 +109,116 @@ class P2ElementwiseShearHeatingAnnulusMap : public Operator< P2Function< real_t protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseShearHeatingAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 556 756 20 12 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseShearHeatingAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 520 720 20 12 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseShearHeatingAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 466 640 20 12 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp index 95c61d85082cc742ba42ea71528d059b75ca17ab..12969d32b6568f148c7173ce64186d39742d5039 100644 --- a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp +++ b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp @@ -175,7 +175,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::apply( const P2Function< real this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -217,6 +217,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::apply( const P2Function< real thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -327,7 +328,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix( const std::shared_p this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -370,6 +371,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix( const std::shared_p thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -467,7 +469,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperato this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -507,6 +509,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperato thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp index 4dbc9213ade4cfc4e6d5083d3f6fb163a1628c13..a4f62adeb0ebde39f4bb18cc612c79e127a1ddbc 100644 --- a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp +++ b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -108,143 +110,158 @@ class P2ElementwiseShearHeatingIcosahedralShellMap : public Operator< P2Function protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2053 2707 46 10 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1953 2607 46 10 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1738 2342 46 10 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index e8bae39b700b8a35755e167e6be829de64102f7a..c30cf5f40111afdbc615ce761c33434f9e77eddb 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index 681c43369debf236892486650760f3357c02f6ef..ed6c10a0c9f22673d89022a6c39956c4e91655d9 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index e26684a90ba13d916073531576a6d3c56432132b..8316560dd9a67e2d9bf19b2aafdb7c7890b3e2a4 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index c41091c7d6ce4164a2fa67ff03b290254b50f900..588a23158f6cfd2d9a63becd4136c650d2a76d76 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp index 45e6f163ca59c329e09c2bceeb4fc3b0cbbb30b7..5fdd5726234a88594aab284e6e0e7741a79f0ad2 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp index 454f70e4b5d313af824a06d1024ee699da065031..87e3c1125683b8c5ba6d8d279cb4e0e70fdaeb20 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp index 5751d999821859fe5bffd5e72686c0e2202451fc..579af214d85b766b780c1cb2fac10857db4e4c19 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp index 319df79c5447435ce546edf811b98674df21374c..53ac3d65e562a39ef1d4b4c7828c7f312695757f 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index 66b9a9e65243e245b7c4d3c64115727675121a99..77ffc3eb0c7ecfa37a69ad51657bbbf3833bc223 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index 9a0ec572178bfeed550dadb627922efcfdb97979..51d93ee142d1b926556c7a1a10a680879d15056a 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index e0b1f2a30b486559e3e0ea7fe805d487ffd4541f..ae71a6199bd3a9423e0c964ba558b2b8c938fac1 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index 3d2aa7f6d152b4f4ca7dab17c6b80782f03ab4be..616b61f27fe960ceff8228a801c589f133d4e7b3 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index 2ba79ecd3919bdf1d02afd9a8e326d97773a07eb..8fec87bfa5ead371be615989a9e986eea775de31 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index 3948bfe6b19ce524704639107b43c7282b796301..53d7d217ab90e96bb9369910e8e004a7e2659012 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp index 42127512a4e748abf2e5aa484109c1085830c1ca..8e145f3f072bae2a20ab68ac0a383b2508add7f2 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp index bfa2f45d610e08331dd144a913dbd6e9f24723c6..b7fd6027275d18693504fd8a91479eba4443dd72 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp similarity index 97% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp index 6f8bbe9dc89333c698742b93599ab84199a5baa8..ba3e6b0967556fc661bd230c5fe2b7d5d6514e79 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp index f10f8d9bde3dce0fba77d3f5d7e89aa662a0fe97..081669dddbc8d57adb6acfbe99253c28ec905cac 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp index 9808520359cd54b50a6a7c41256a7c3a499255b0..869746811de02b29eab4ab09c746a0a9216bbd8b 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::toMatrix_P2ElementwiseShearHeating_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp index e6b13d44316e3bd4e9e6bcfe01844a16a0a870b4..b896d935437763eac8ab40dbff660faddd11051a 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::toMatrix_P2ElementwiseShearHeating_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};