Skip to content
Snippets Groups Projects
Commit 3567a180 authored by Nils Kohl's avatar Nils Kohl :full_moon_with_face:
Browse files

Merge branch 'kohl/k-mass' into 'main'

Scaled mass (k_mass)

See merge request !4
parents 68a86e41 96d31cb0
Branches
No related tags found
1 merge request!4Scaled mass (k_mass)
Pipeline #64145 passed
Showing
with 10261 additions and 2 deletions
...@@ -96,7 +96,8 @@ def main() -> None: ...@@ -96,7 +96,8 @@ def main() -> None:
with Pool(args.processes) as pool: with Pool(args.processes) as pool:
for form_str, operators in toml_dict.items(): for form_str, operators in toml_dict.items():
for spec in operators: for spec in operators:
pool.apply_async(generate_operator, (args, form_str, spec)) ret = pool.apply_async(generate_operator, (args, form_str, spec))
ret.get()
pool.close() pool.close()
pool.join() pool.join()
......
--extra-index-url https://test.pypi.org/simple/ --extra-index-url https://test.pypi.org/simple/
hfg @ git+ssh://git@i10git.cs.fau.de/terraneo/hyteg-form-generator.git@d240fe6b8633d6ab4cf6fa741f3e7dedc74687f6 hfg @ git+ssh://git@i10git.cs.fau.de/terraneo/hyteg-form-generator.git@5df6dbee6f366632bc5c20d91597172a653a37b3
tomli >= 1.1.0 ; python_version < "3.11" tomli >= 1.1.0 ; python_version < "3.11"
clang-format clang-format
...@@ -57,6 +57,25 @@ quadrature = 2 ...@@ -57,6 +57,25 @@ quadrature = 2
loop-strategy = "sawtooth" loop-strategy = "sawtooth"
optimizations = ["moveconstants", "vectorize", "quadloops", "tabulate"] optimizations = ["moveconstants", "vectorize", "quadloops", "tabulate"]
[[k_mass]]
trial-space = "P1"
test-space = "P1"
form-space-args.coefficient_function_space = "P1"
dimensions = [2, 3]
quadrature = 3
loop-strategy = "sawtooth"
optimizations = ["moveconstants", "vectorize", "quadloops", "tabulate"]
[[k_mass]]
trial-space = "P1"
test-space = "P1"
form-space-args.coefficient_function_space = "P1"
dimensions = [3]
quadrature = 4
loop-strategy = "sawtooth"
optimizations = ["moveconstants", "vectorize", "quadloops", "tabulate"]
blending = "IcosahedralShellMap"
[[divergence]] [[divergence]]
trial-space = "P2" trial-space = "P2"
test-space = "P1" test-space = "P1"
......
...@@ -12,4 +12,5 @@ add_subdirectory(divergence) ...@@ -12,4 +12,5 @@ add_subdirectory(divergence)
add_subdirectory(epsilon) add_subdirectory(epsilon)
add_subdirectory(full_stokes) add_subdirectory(full_stokes)
add_subdirectory(gradient) add_subdirectory(gradient)
add_subdirectory(k_mass)
add_subdirectory(mass) add_subdirectory(mass)
add_library( opgen-k_mass
P1ElementwiseKMass.cpp
P1ElementwiseKMass.hpp
P1ElementwiseKMassIcosahedralShellMap.cpp
P1ElementwiseKMassIcosahedralShellMap.hpp
)
if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
target_sources(opgen-k_mass PRIVATE
avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
avx/P1ElementwiseKMass_apply_macro_2D.cpp
avx/P1ElementwiseKMass_apply_macro_3D.cpp
avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp
noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp
)
set_source_files_properties(
avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
avx/P1ElementwiseKMass_apply_macro_2D.cpp
avx/P1ElementwiseKMass_apply_macro_3D.cpp
avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
)
else()
if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY)
message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.")
endif()
target_sources(opgen-k_mass PRIVATE
noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
noarch/P1ElementwiseKMass_apply_macro_2D.cpp
noarch/P1ElementwiseKMass_apply_macro_3D.cpp
noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp
noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp
)
endif()
if (HYTEG_BUILD_WITH_PETSC)
target_link_libraries(opgen-k_mass PUBLIC PETSc::PETSc)
endif ()
if (WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT)
target_compile_features(opgen-k_mass PUBLIC cxx_std_23)
else ()
target_compile_features(opgen-k_mass PUBLIC cxx_std_17)
endif ()
This diff is collapsed.
/*
* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
*
* This file is part of HyTeG
* (see https://i10git.cs.fau.de/hyteg/hyteg).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* The entire file was generated with the HyTeG form generator.
*
* Avoid modifying this file. If buggy, consider fixing the generator itself.
*/
#pragma once
#include "core/DataTypes.h"
#include "hyteg/LikwidWrapper.hpp"
#include "hyteg/communication/Syncing.hpp"
#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
#include "hyteg/operators/Operator.hpp"
#include "hyteg/p1functionspace/P1Function.hpp"
#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
#include "hyteg/solvers/Smoothables.hpp"
#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
#define FUNC_PREFIX
namespace hyteg {
namespace operatorgeneration {
/// Diffusion operator with a scalar coefficient.
///
/// Geometry map: IdentityMap
///
/// Weak formulation
///
/// u: trial function (space: Lagrange, degree: 1)
/// v: test function (space: Lagrange, degree: 1)
/// k: coefficient (space: Lagrange, degree: 1)
///
/// ∫ k uv
class P1ElementwiseKMass : public Operator< P1Function< real_t >, P1Function< real_t > >,
public OperatorWithInverseDiagonal< P1Function< real_t > >
{
public:
P1ElementwiseKMass( const std::shared_ptr< PrimitiveStorage >& storage,
size_t minLevel,
size_t maxLevel,
const P1Function< real_t >& _k );
void apply( const P1Function< real_t >& src,
const P1Function< real_t >& dst,
uint_t level,
DoFType flag,
UpdateType updateType = Replace ) const;
void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
const P1Function< idx_t >& src,
const P1Function< idx_t >& dst,
uint_t level,
DoFType flag ) const;
void computeInverseDiagonalOperatorValues();
std::shared_ptr< P1Function< real_t > > getInverseDiagonalValues() const;
protected:
private:
/// Kernel type: apply
/// - quadrature rule: Dunavant 3 | points: 4, degree: 3
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 49 49 0 0 0 0 0 0
void apply_macro_2D( real_t* RESTRICT _data_dst,
real_t* RESTRICT _data_k,
real_t* RESTRICT _data_src,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float ) const;
/// Kernel type: apply
/// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 96 91 0 0 0 0 0 0
void apply_macro_3D( real_t* RESTRICT _data_dst,
real_t* RESTRICT _data_k,
real_t* RESTRICT _data_src,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_0comp2,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_1comp2,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
real_t macro_vertex_coord_id_2comp2,
real_t macro_vertex_coord_id_3comp0,
real_t macro_vertex_coord_id_3comp1,
real_t macro_vertex_coord_id_3comp2,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float ) const;
/// Kernel type: toMatrix
/// - quadrature rule: Dunavant 3 | points: 4, degree: 3
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 40 40 0 0 0 0 0 3
void toMatrix_macro_2D( idx_t* RESTRICT _data_dst,
real_t* RESTRICT _data_k,
idx_t* RESTRICT _data_src,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
std::shared_ptr< SparseMatrixProxy > mat,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float ) const;
/// Kernel type: toMatrix
/// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 80 75 0 0 0 0 0 3
void toMatrix_macro_3D( idx_t* RESTRICT _data_dst,
real_t* RESTRICT _data_k,
idx_t* RESTRICT _data_src,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_0comp2,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_1comp2,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
real_t macro_vertex_coord_id_2comp2,
real_t macro_vertex_coord_id_3comp0,
real_t macro_vertex_coord_id_3comp1,
real_t macro_vertex_coord_id_3comp2,
std::shared_ptr< SparseMatrixProxy > mat,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float ) const;
/// Kernel type: computeInverseDiagonalOperatorValues
/// - quadrature rule: Dunavant 3 | points: 4, degree: 3
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 31 28 0 0 0 0 0 0
void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_,
real_t* RESTRICT _data_k,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float ) const;
/// Kernel type: computeInverseDiagonalOperatorValues
/// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 54 45 0 0 0 0 0 0
void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_,
real_t* RESTRICT _data_k,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_0comp2,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_1comp2,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
real_t macro_vertex_coord_id_2comp2,
real_t macro_vertex_coord_id_3comp0,
real_t macro_vertex_coord_id_3comp1,
real_t macro_vertex_coord_id_3comp2,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float ) const;
std::shared_ptr< P1Function< real_t > > invDiag_;
P1Function< real_t > k;
};
} // namespace operatorgeneration
} // namespace hyteg
This diff is collapsed.
/*
* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
*
* This file is part of HyTeG
* (see https://i10git.cs.fau.de/hyteg/hyteg).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* The entire file was generated with the HyTeG form generator.
*
* Avoid modifying this file. If buggy, consider fixing the generator itself.
*/
#pragma once
#include "core/DataTypes.h"
#include "hyteg/LikwidWrapper.hpp"
#include "hyteg/communication/Syncing.hpp"
#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
#include "hyteg/geometry/IcosahedralShellMap.hpp"
#include "hyteg/operators/Operator.hpp"
#include "hyteg/p1functionspace/P1Function.hpp"
#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
#include "hyteg/solvers/Smoothables.hpp"
#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
#define FUNC_PREFIX
namespace hyteg {
namespace operatorgeneration {
/// Diffusion operator with a scalar coefficient.
///
/// Geometry map: IcosahedralShellMap
///
/// Weak formulation
///
/// u: trial function (space: Lagrange, degree: 1)
/// v: test function (space: Lagrange, degree: 1)
/// k: coefficient (space: Lagrange, degree: 1)
///
/// ∫ k uv
class P1ElementwiseKMassIcosahedralShellMap : public Operator< P1Function< real_t >, P1Function< real_t > >,
public OperatorWithInverseDiagonal< P1Function< real_t > >
{
public:
P1ElementwiseKMassIcosahedralShellMap( const std::shared_ptr< PrimitiveStorage >& storage,
size_t minLevel,
size_t maxLevel,
const P1Function< real_t >& _k );
void apply( const P1Function< real_t >& src,
const P1Function< real_t >& dst,
uint_t level,
DoFType flag,
UpdateType updateType = Replace ) const;
void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
const P1Function< idx_t >& src,
const P1Function< idx_t >& dst,
uint_t level,
DoFType flag ) const;
void computeInverseDiagonalOperatorValues();
std::shared_ptr< P1Function< real_t > > getInverseDiagonalValues() const;
protected:
private:
/// Kernel type: apply
/// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 945 1177 47 11 11 0 0 1
void apply_macro_3D( real_t* RESTRICT _data_dst,
real_t* RESTRICT _data_k,
real_t* RESTRICT _data_src,
real_t forVertex_0,
real_t forVertex_1,
real_t forVertex_2,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_0comp2,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_1comp2,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
real_t macro_vertex_coord_id_2comp2,
real_t macro_vertex_coord_id_3comp0,
real_t macro_vertex_coord_id_3comp1,
real_t macro_vertex_coord_id_3comp2,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float,
real_t radRayVertex,
real_t radRefVertex,
real_t rayVertex_0,
real_t rayVertex_1,
real_t rayVertex_2,
real_t refVertex_0,
real_t refVertex_1,
real_t refVertex_2,
real_t thrVertex_0,
real_t thrVertex_1,
real_t thrVertex_2 ) const;
/// Kernel type: toMatrix
/// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 929 1161 47 11 11 0 0 4
void toMatrix_macro_3D( idx_t* RESTRICT _data_dst,
real_t* RESTRICT _data_k,
idx_t* RESTRICT _data_src,
real_t forVertex_0,
real_t forVertex_1,
real_t forVertex_2,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_0comp2,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_1comp2,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
real_t macro_vertex_coord_id_2comp2,
real_t macro_vertex_coord_id_3comp0,
real_t macro_vertex_coord_id_3comp1,
real_t macro_vertex_coord_id_3comp2,
std::shared_ptr< SparseMatrixProxy > mat,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float,
real_t radRayVertex,
real_t radRefVertex,
real_t rayVertex_0,
real_t rayVertex_1,
real_t rayVertex_2,
real_t refVertex_0,
real_t refVertex_1,
real_t refVertex_2,
real_t thrVertex_0,
real_t thrVertex_1,
real_t thrVertex_2 ) const;
/// Kernel type: computeInverseDiagonalOperatorValues
/// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
/// - operations per element:
/// adds muls divs pows abs assignments function_calls unknown_ops
/// ------ ------ ------ ------ ----- ------------- ---------------- -------------
/// 867 1095 47 11 11 0 0 1
void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_,
real_t* RESTRICT _data_k,
real_t forVertex_0,
real_t forVertex_1,
real_t forVertex_2,
real_t macro_vertex_coord_id_0comp0,
real_t macro_vertex_coord_id_0comp1,
real_t macro_vertex_coord_id_0comp2,
real_t macro_vertex_coord_id_1comp0,
real_t macro_vertex_coord_id_1comp1,
real_t macro_vertex_coord_id_1comp2,
real_t macro_vertex_coord_id_2comp0,
real_t macro_vertex_coord_id_2comp1,
real_t macro_vertex_coord_id_2comp2,
real_t macro_vertex_coord_id_3comp0,
real_t macro_vertex_coord_id_3comp1,
real_t macro_vertex_coord_id_3comp2,
int64_t micro_edges_per_macro_edge,
real_t micro_edges_per_macro_edge_float,
real_t radRayVertex,
real_t radRefVertex,
real_t rayVertex_0,
real_t rayVertex_1,
real_t rayVertex_2,
real_t refVertex_0,
real_t refVertex_1,
real_t refVertex_2,
real_t thrVertex_0,
real_t thrVertex_1,
real_t thrVertex_2 ) const;
std::shared_ptr< P1Function< real_t > > invDiag_;
P1Function< real_t > k;
};
} // namespace operatorgeneration
} // namespace hyteg
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
*
* This file is part of HyTeG
* (see https://i10git.cs.fau.de/hyteg/hyteg).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* The entire file was generated with the HyTeG form generator.
*
* Avoid modifying this file. If buggy, consider fixing the generator itself.
*/
#include "../P1ElementwiseKMass.hpp"
#define FUNC_PREFIX
namespace hyteg {
namespace operatorgeneration {
void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
{
{
const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
const real_t abs_det_jac_affine_GRAY = abs(jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
{
/* FaceType.GRAY */
const real_t _data_phi_psi_jac_affine_det_0_0_GRAY [] = {((real_t)(abs_det_jac_affine_GRAY*0.11111111111111117)), ((real_t)(abs_det_jac_affine_GRAY*0.11111111111111113)), ((real_t)(abs_det_jac_affine_GRAY*0.11111111111111113)), ((real_t)(abs_det_jac_affine_GRAY*0.1111111111111111)), ((real_t)(abs_det_jac_affine_GRAY*0.1111111111111111)), ((real_t)(abs_det_jac_affine_GRAY*0.1111111111111111)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000029)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000015)), ((real_t)(abs_det_jac_affine_GRAY*0.12000000000000004)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.12)), ((real_t)(abs_det_jac_affine_GRAY*0.35999999999999999)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.12)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.35999999999999999)), ((real_t)(abs_det_jac_affine_GRAY*0.12)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.3600000000000001)), ((real_t)(abs_det_jac_affine_GRAY*0.12000000000000002)), ((real_t)(abs_det_jac_affine_GRAY*0.12000000000000002)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008))};
for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
{
for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
{
const __m256d k_dof_0 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
const __m256d k_dof_1 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
const __m256d k_dof_2 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
__m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
__m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
__m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
for (int64_t q = 0; q < 4; q += 1)
{
const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(k_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))),_mm256_mul_pd(k_dof_1,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(k_dof_2,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]));
const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q]));
const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3]));
const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5]));
q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
}
const __m256d elMatDiag_0 = q_acc_0_0;
const __m256d elMatDiag_1 = q_acc_1_1;
const __m256d elMatDiag_2 = q_acc_2_2;
_mm256_storeu_pd(&_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
_mm256_storeu_pd(&_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
_mm256_storeu_pd(&_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
}
for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
{
const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
const real_t k_dof_1 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
real_t q_acc_0_0 = 0.0;
real_t q_acc_1_1 = 0.0;
real_t q_acc_2_2 = 0.0;
for (int64_t q = 0; q < 4; q += 1)
{
const real_t tmp_qloop_0 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*_data_q_w[q];
const real_t q_tmp_0_0 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q];
const real_t q_tmp_1_1 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3];
const real_t q_tmp_2_2 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5];
q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
}
const real_t elMatDiag_0 = q_acc_0_0;
const real_t elMatDiag_1 = q_acc_1_1;
const real_t elMatDiag_2 = q_acc_2_2;
_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
}
}
}
const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
const real_t abs_det_jac_affine_BLUE = abs(jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
{
/* FaceType.BLUE */
const real_t _data_phi_psi_jac_affine_det_0_0_BLUE [] = {((real_t)(abs_det_jac_affine_BLUE*0.11111111111111117)), ((real_t)(abs_det_jac_affine_BLUE*0.11111111111111113)), ((real_t)(abs_det_jac_affine_BLUE*0.11111111111111113)), ((real_t)(abs_det_jac_affine_BLUE*0.1111111111111111)), ((real_t)(abs_det_jac_affine_BLUE*0.1111111111111111)), ((real_t)(abs_det_jac_affine_BLUE*0.1111111111111111)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000029)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000015)), ((real_t)(abs_det_jac_affine_BLUE*0.12000000000000004)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.12)), ((real_t)(abs_det_jac_affine_BLUE*0.35999999999999999)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.12)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.35999999999999999)), ((real_t)(abs_det_jac_affine_BLUE*0.12)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.3600000000000001)), ((real_t)(abs_det_jac_affine_BLUE*0.12000000000000002)), ((real_t)(abs_det_jac_affine_BLUE*0.12000000000000002)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008))};
for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
{
for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
{
const __m256d k_dof_0 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
const __m256d k_dof_1 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
const __m256d k_dof_2 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
__m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
__m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
__m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
for (int64_t q = 0; q < 4; q += 1)
{
const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(k_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))),_mm256_mul_pd(k_dof_1,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(k_dof_2,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]));
const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q]));
const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3]));
const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5]));
q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
}
const __m256d elMatDiag_0 = q_acc_0_0;
const __m256d elMatDiag_1 = q_acc_1_1;
const __m256d elMatDiag_2 = q_acc_2_2;
_mm256_storeu_pd(&_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
_mm256_storeu_pd(&_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
_mm256_storeu_pd(&_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
}
for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
{
const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
const real_t k_dof_1 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
real_t q_acc_0_0 = 0.0;
real_t q_acc_1_1 = 0.0;
real_t q_acc_2_2 = 0.0;
for (int64_t q = 0; q < 4; q += 1)
{
const real_t tmp_qloop_0 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*_data_q_w[q];
const real_t q_tmp_0_0 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q];
const real_t q_tmp_1_1 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3];
const real_t q_tmp_2_2 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5];
q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
}
const real_t elMatDiag_0 = q_acc_0_0;
const real_t elMatDiag_1 = q_acc_1_1;
const real_t elMatDiag_2 = q_acc_2_2;
_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
}
}
}
}
}
} // namespace operatorgeneration
} // namespace hyteg
/*
* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
*
* This file is part of HyTeG
* (see https://i10git.cs.fau.de/hyteg/hyteg).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* The entire file was generated with the HyTeG form generator.
*
* Avoid modifying this file. If buggy, consider fixing the generator itself.
*/
#include "../P1ElementwiseKMass.hpp"
#define FUNC_PREFIX
namespace hyteg {
namespace operatorgeneration {
void P1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
{
{
const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
const real_t abs_det_jac_affine_GRAY = abs(jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
{
/* FaceType.GRAY */
const real_t _data_phi_psi_jac_affine_det_0_0_GRAY [] = {((real_t)(abs_det_jac_affine_GRAY*0.11111111111111117)), ((real_t)(abs_det_jac_affine_GRAY*0.11111111111111113)), ((real_t)(abs_det_jac_affine_GRAY*0.11111111111111113)), ((real_t)(abs_det_jac_affine_GRAY*0.1111111111111111)), ((real_t)(abs_det_jac_affine_GRAY*0.1111111111111111)), ((real_t)(abs_det_jac_affine_GRAY*0.1111111111111111)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000029)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000015)), ((real_t)(abs_det_jac_affine_GRAY*0.12000000000000004)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.12)), ((real_t)(abs_det_jac_affine_GRAY*0.35999999999999999)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.12)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.35999999999999999)), ((real_t)(abs_det_jac_affine_GRAY*0.12)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.3600000000000001)), ((real_t)(abs_det_jac_affine_GRAY*0.12000000000000002)), ((real_t)(abs_det_jac_affine_GRAY*0.12000000000000002)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008)), ((real_t)(abs_det_jac_affine_GRAY*0.040000000000000008))};
for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
{
const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
const real_t src_dof_1 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
const real_t k_dof_1 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
real_t q_acc_0_0 = 0.0;
real_t q_acc_0_1 = 0.0;
real_t q_acc_0_2 = 0.0;
real_t q_acc_1_1 = 0.0;
real_t q_acc_1_2 = 0.0;
real_t q_acc_2_2 = 0.0;
for (int64_t q = 0; q < 4; q += 1)
{
const real_t tmp_qloop_0 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*_data_q_w[q];
const real_t q_tmp_0_0 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q];
const real_t q_tmp_0_1 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 1];
const real_t q_tmp_0_2 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 2];
const real_t q_tmp_1_1 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3];
const real_t q_tmp_1_2 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 4];
const real_t q_tmp_2_2 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5];
q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
}
const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2;
const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2;
const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2;
_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
}
}
const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
const real_t abs_det_jac_affine_BLUE = abs(jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
{
/* FaceType.BLUE */
const real_t _data_phi_psi_jac_affine_det_0_0_BLUE [] = {((real_t)(abs_det_jac_affine_BLUE*0.11111111111111117)), ((real_t)(abs_det_jac_affine_BLUE*0.11111111111111113)), ((real_t)(abs_det_jac_affine_BLUE*0.11111111111111113)), ((real_t)(abs_det_jac_affine_BLUE*0.1111111111111111)), ((real_t)(abs_det_jac_affine_BLUE*0.1111111111111111)), ((real_t)(abs_det_jac_affine_BLUE*0.1111111111111111)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000029)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000015)), ((real_t)(abs_det_jac_affine_BLUE*0.12000000000000004)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.12)), ((real_t)(abs_det_jac_affine_BLUE*0.35999999999999999)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.12)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.35999999999999999)), ((real_t)(abs_det_jac_affine_BLUE*0.12)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.3600000000000001)), ((real_t)(abs_det_jac_affine_BLUE*0.12000000000000002)), ((real_t)(abs_det_jac_affine_BLUE*0.12000000000000002)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008)), ((real_t)(abs_det_jac_affine_BLUE*0.040000000000000008))};
for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
{
const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
const real_t src_dof_1 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
const real_t k_dof_1 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
real_t q_acc_0_0 = 0.0;
real_t q_acc_0_1 = 0.0;
real_t q_acc_0_2 = 0.0;
real_t q_acc_1_1 = 0.0;
real_t q_acc_1_2 = 0.0;
real_t q_acc_2_2 = 0.0;
for (int64_t q = 0; q < 4; q += 1)
{
const real_t tmp_qloop_0 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*_data_q_w[q];
const real_t q_tmp_0_0 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q];
const real_t q_tmp_0_1 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 1];
const real_t q_tmp_0_2 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 2];
const real_t q_tmp_1_1 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3];
const real_t q_tmp_1_2 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 4];
const real_t q_tmp_2_2 = tmp_qloop_0*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5];
q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
}
const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2;
const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2;
const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2;
_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
}
}
}
}
} // namespace operatorgeneration
} // namespace hyteg
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment