diff --git a/generate_all_hyteg_forms.py b/generate_all_hyteg_forms.py
index e2731976571de4544c7147174f04d6116ad3ba56..313c5a185c90b50c7f510317793eb1b4e4c0eba8 100644
--- a/generate_all_hyteg_forms.py
+++ b/generate_all_hyteg_forms.py
@@ -24,7 +24,7 @@ import sympy as sp
 from sympy.core.cache import clear_cache
 import re
 
-from hog.blending import GeometryMap, IdentityMap, ExternalMap
+from hog.blending import GeometryMap, IdentityMap, ExternalMap, AnnulusMap
 from hog.element_geometry import TriangleElement, TetrahedronElement, ElementGeometry
 from hog.function_space import FunctionSpace, LagrangianFunctionSpace, N1E1Space
 from hog.forms import (
@@ -38,9 +38,10 @@ from hog.forms import (
     divergence,
     full_stokes,
     divdiv,
+    supg_diffusion,
 )
 from hog.forms_vectorial import mass_n1e1, curl_curl
-from hog.quadrature import Quadrature
+from hog.quadrature import Quadrature, select_quadrule
 from hog.exception import HOGException
 from hog.logger import TimedLogger
 from hog.symbolizer import Symbolizer
@@ -419,6 +420,20 @@ form_infos = [
         description="Implements a linear form of type: (k(x), psi) where psi a test function and k = k(x) a vectorial, external function.",
         integrate_rows=[],
     ),
+    FormInfo(
+        "supg_diffusion",
+        trial_degree=2,
+        test_degree=2,
+        quad_schemes={2: 4, 3: 4},
+        blending=ExternalMap(),
+    ),
+    FormInfo(
+        "supg_diffusion",
+        trial_degree=2,
+        test_degree=2,
+        quad_schemes={2: 4, 3: 4},
+        blending=AnnulusMap(),
+    )
 ]
 
 for d in [1, 2]:
@@ -658,6 +673,8 @@ def form_func(
             transpose=False,
             blending=blending,
         ).integrate(quad, symbolizer)
+    elif name.startswith("supg_d"):
+        raise HOGException(f"SUPG Diffusion is not supported for form generation")
     else:
         raise HOGException(f"Cannot call form function with name {name}.")
 
@@ -821,7 +838,8 @@ def main():
                             f"- Generating code for class {form_info.class_name(row,col)}, {geometry.dimensions}D"
                         ):
                             quad = Quadrature(
-                                form_info.quad_schemes[geometry.dimensions],
+                                select_quadrule(form_info.quad_schemes[geometry.dimensions], geometry),
+                                # form_info.quad_schemes[geometry.dimensions],
                                 geometry,
                                 inline_values=form_info.inline_quad,
                             )
diff --git a/generate_all_operators.py b/generate_all_operators.py
index 4159e66c6e5bcfc83d0cda7c365b93a855109b9c..a3a4bf0a71117e8ea8d7c01d0d3afb1f5351f74d 100644
--- a/generate_all_operators.py
+++ b/generate_all_operators.py
@@ -42,6 +42,7 @@ from hog.forms import (
     full_stokes,
     nonlinear_diffusion,
     nonlinear_diffusion_newton_galerkin,
+    supg_diffusion,
 )
 from hog.forms_vectorial import curl_curl, curl_curl_plus_mass, mass_n1e1
 from hog.function_space import (
@@ -353,7 +354,7 @@ def main():
             args.quad_degree if args.quad_rule is None else args.quad_rule,
         )
     }
-
+    
     enabled_geometries: Set[TriangleElement | TetrahedronElement] = set()
     if 2 in args.dimensions:
         enabled_geometries.add(TriangleElement())
@@ -587,6 +588,10 @@ def all_operators(
     ops.append(OperatorInfo(mapping="P1Vector", name="Diffusion", trial_space=P1Vector, test_space=P1Vector,
                             form=diffusion, type_descriptor=type_descriptor, geometries=list(geometries), opts=opts, blending=blending))
 
+    ops.append(OperatorInfo(mapping="P2", name="SUPGDiffusion", trial_space=P2, test_space=P2, 
+                            form=partial(supg_diffusion, velocity_function_space=P2, diffusivityXdelta_function_space=P2), 
+                            type_descriptor=type_descriptor, geometries=list(geometries), opts=opts, blending=blending))
+    
     # fmt: on
 
     p2vec_epsilon = partial(
diff --git a/hog/blending.py b/hog/blending.py
index fede0c1f40e5243e78fa2caeb5a7d045a3426f49..7a8f884e0924f6882fdf65593e416c38fe0c1bf8 100644
--- a/hog/blending.py
+++ b/hog/blending.py
@@ -49,6 +49,10 @@ class GeometryMap:
     def jacobian(self, x: sp.Matrix) -> sp.Matrix:
         """Evaluates the Jacobian of the geometry map at the passed point."""
         raise HOGException("jacobian() not implemented for this map.")
+    
+    def hessian(self, x: sp.Matrix) -> List[sp.Matrix]:
+        """Evaluates the hessian of the geometry map at the passed point."""
+        raise HOGException("hessian() not implemented for this map.")
 
     def coupling_includes(self) -> List[str]:
         """Returns a list of files that better be included into the C++ files when this map is used."""
@@ -182,6 +186,55 @@ class AnnulusMap(GeometryMap):
 
         return jac
 
+    def hessian(self, x: sp.Matrix) -> List[sp.Matrix]:
+        """Evaluates the Jacobian of the geometry map at the passed point."""
+
+        if sp.shape(x) != (2, 1):
+            raise HOGException("Invalid input shape for AnnulusMap.")
+
+        radRefVertex = self.radRefVertex
+        radRayVertex = self.radRayVertex
+        refVertex = self.refVertex
+        rayVertex = self.rayVertex
+        thrVertex = self.thrVertex
+
+        xAnnulus, yAnnulus = sp.symbols("x y")
+
+        dist = radRefVertex - radRayVertex
+        areaT = (refVertex[0] - rayVertex[0]) * (thrVertex[1] - rayVertex[1]) - (
+            refVertex[1] - rayVertex[1]
+        ) * (thrVertex[0] - rayVertex[0])
+        areaX = (xAnnulus - rayVertex[0]) * (thrVertex[1] - rayVertex[1]) - (
+            yAnnulus - rayVertex[1]
+        ) * (thrVertex[0] - rayVertex[0])
+        bary = areaX / areaT
+        oldRad = sp.sqrt(xAnnulus * xAnnulus + yAnnulus * yAnnulus)
+        newRad = radRayVertex + bary * dist
+
+        invNorm = 1.0 / oldRad
+        invNorm3 = invNorm * invNorm * invNorm
+        tmp0 = invNorm * dist / areaT
+        tmp1 = xAnnulus * tmp0
+        tmp2 = yAnnulus * tmp0
+        tmp3 = thrVertex[1] - rayVertex[1]
+        tmp4 = thrVertex[0] - rayVertex[0]
+        tmp5 = xAnnulus * invNorm3 * newRad
+        tmp6 = yAnnulus * invNorm3 * newRad
+
+        jac = sp.Matrix(
+            [
+                [yAnnulus * tmp6 + tmp1 * tmp3, -xAnnulus * tmp6 - tmp1 * tmp4],
+                [-yAnnulus * tmp5 + tmp2 * tmp3, xAnnulus * tmp5 - tmp2 * tmp4],
+            ]
+        ).T
+
+        hess = [
+            sp.diff(jac, xAnnulus).subs([(xAnnulus, x[0]), (yAnnulus, x[1])]),
+            sp.diff(jac, yAnnulus).subs([(xAnnulus, x[0]), (yAnnulus, x[1])]),
+        ]
+
+        return hess
+
     def coupling_includes(self) -> List[str]:
         return ["hyteg/geometry/AnnulusMap.hpp"]
 
@@ -373,6 +426,108 @@ class IcosahedralShellMap(GeometryMap):
 
         return jac
 
+    def hessian(self, x_: sp.Matrix) -> List[sp.Matrix]:
+        """Evaluates the Jacobian of the geometry map at the passed point."""
+
+        if sp.shape(x_) != (3, 1):
+            raise HOGException("Invalid input shape for IcosahedralShellMap.")
+
+        xAnnulus, yAnnulus, zAnnulus = sp.symbols("x y z")
+
+        x = [xAnnulus, yAnnulus, zAnnulus]
+
+        radRefVertex = self.radRefVertex
+        radRayVertex = self.radRayVertex
+        refVertex = self.refVertex
+        rayVertex = self.rayVertex
+        thrVertex = self.thrVertex
+        forVertex = self.forVertex
+
+        tmp0 = x[0] * x[0]
+        tmp1 = rayVertex[2] - refVertex[2]
+        tmp2 = rayVertex[0] - thrVertex[0]
+        tmp3 = rayVertex[1] - forVertex[1]
+        tmp4 = tmp2 * tmp3
+        tmp5 = rayVertex[1] - refVertex[1]
+        tmp6 = rayVertex[0] - forVertex[0]
+        tmp7 = rayVertex[2] - thrVertex[2]
+        tmp8 = tmp6 * tmp7
+        tmp9 = rayVertex[0] - refVertex[0]
+        tmp10 = rayVertex[1] - thrVertex[1]
+        tmp11 = rayVertex[2] - forVertex[2]
+        tmp12 = tmp10 * tmp11
+        tmp13 = tmp11 * tmp2
+        tmp14 = tmp10 * tmp6
+        tmp15 = tmp3 * tmp7
+        tmp16 = (
+            -tmp1 * tmp14
+            + tmp1 * tmp4
+            + tmp12 * tmp9
+            - tmp13 * tmp5
+            - tmp15 * tmp9
+            + tmp5 * tmp8
+        )
+        tmp17 = radRayVertex - radRefVertex
+        tmp18 = rayVertex[2] - x[2]
+        tmp19 = rayVertex[1] - x[1]
+        tmp20 = rayVertex[0] - x[0]
+        tmp21 = radRayVertex * tmp16 - tmp17 * (
+            tmp12 * tmp20
+            - tmp13 * tmp19
+            - tmp14 * tmp18
+            - tmp15 * tmp20
+            + tmp18 * tmp4
+            + tmp19 * tmp8
+        )
+        tmp22 = x[1] * x[1]
+        tmp23 = x[2] * x[2]
+        tmp24 = tmp0 + tmp22 + tmp23
+        tmp25 = tmp17 * (tmp12 - tmp15)
+        tmp26 = 1.0 / (tmp16 * tmp24 ** (sp.Rational(3, 2)))
+        tmp27 = tmp13 - tmp8
+        tmp28 = tmp17 * tmp24
+        tmp29 = tmp21 * x[1] + tmp27 * tmp28
+        tmp30 = tmp26 * x[0]
+        tmp31 = -tmp14 + tmp4
+        tmp32 = -tmp21 * x[2] + tmp28 * tmp31
+        tmp33 = -tmp21 * x[0] + tmp24 * tmp25
+        tmp34 = tmp26 * x[1]
+        tmp35 = tmp26 * x[2]
+
+        jac = sp.Matrix(
+            [
+                [
+                    tmp26 * (-tmp0 * tmp21 + tmp24 * (tmp21 + tmp25 * x[0])),
+                    -tmp29 * tmp30,
+                    tmp30 * tmp32,
+                ],
+                [
+                    tmp33 * tmp34,
+                    tmp26 * (-tmp21 * tmp22 + tmp24 * (-tmp17 * tmp27 * x[1] + tmp21)),
+                    tmp32 * tmp34,
+                ],
+                [
+                    tmp33 * tmp35,
+                    -tmp29 * tmp35,
+                    tmp26 * (-tmp21 * tmp23 + tmp24 * (tmp17 * tmp31 * x[2] + tmp21)),
+                ],
+            ]
+        ).T
+
+        hess = [
+            sp.diff(jac, xAnnulus).subs(
+                [(xAnnulus, x_[0]), (yAnnulus, x_[1]), (zAnnulus, x_[2])]
+            ),
+            sp.diff(jac, yAnnulus).subs(
+                [(xAnnulus, x_[0]), (yAnnulus, x_[1]), (zAnnulus, x_[2])]
+            ),
+            sp.diff(jac, zAnnulus).subs(
+                [(xAnnulus, x_[0]), (yAnnulus, x_[1]), (zAnnulus, x_[2])]
+            ),
+        ]
+
+        return hess
+
     def coupling_includes(self) -> List[str]:
         return ["hyteg/geometry/IcosahedralShellMap.hpp"]
 
diff --git a/hog/code_generation.py b/hog/code_generation.py
index c260e3d8efff2998bb2cc6e31d589a184c0aa1fb..e20e3197b26b988b002ed119d87ff6e4c9a0fd09 100644
--- a/hog/code_generation.py
+++ b/hog/code_generation.py
@@ -29,6 +29,7 @@ from hog.fem_helpers import (
     jac_ref_to_affine,
     trafo_ref_to_affine,
     jac_blending_evaluate,
+    hess_blending_evaluate,
     abs_det_jac_blending_eval_symbols,
     jac_blending_inv_eval_symbols,
 )
@@ -159,9 +160,9 @@ def replace_multi_assignments(
         # Actually filling the dict.
         for ma in multi_assignments:
             replacement_symbol = replacement_symbols[ma.output_arg()]
-            multi_assignments_replacement_symbols[ma.unique_identifier] = (
-                replacement_symbol
-            )
+            multi_assignments_replacement_symbols[
+                ma.unique_identifier
+            ] = replacement_symbol
 
     if multi_assignments_replacement_symbols:
         with TimedLogger(
@@ -288,7 +289,6 @@ def blending_jacobi_matrix_assignments(
     blending: GeometryMap,
     quad_info: Quadrature,
 ) -> List[Assignment]:
-
     assignments = []
 
     free_symbols = element_matrix.free_symbols | {
@@ -357,6 +357,58 @@ def blending_jacobi_matrix_assignments(
     return assignments
 
 
+def hessian_matrix_assignments(
+    element_matrix: sp.Matrix,
+    quad_stmts: List[sp.codegen.ast.CodegenAST],
+    geometry: ElementGeometry,
+    symbolizer: Symbolizer,
+    affine_points: Optional[List[sp.Matrix]],
+    blending: GeometryMap,
+    quad_info: Quadrature,
+) -> List[Assignment]:
+    assignments = []
+
+    free_symbols = element_matrix.free_symbols | {
+        free_symbol
+        for stmt in quad_stmts
+        for free_symbol in stmt.undefined_symbols
+        if isinstance(stmt, Node)
+    }
+
+    for i_q_pt, point in enumerate(quad_info._point_symbols):
+        hess_blend_symbol = symbolizer.hessian_blending_map(
+            geometry.dimensions, q_pt=f"_q_{i_q_pt}"
+        )
+
+        hess_blending_in_expr = set.union(
+            *[
+                set(hess_blend_symbol[i]).intersection(free_symbols)
+                for i in range(geometry.dimensions)
+            ]
+        )
+
+        # Just an early exit. Not strictly required, but might accelerate this process in some cases.
+        if hess_blending_in_expr:
+            if isinstance(blending, ExternalMap):
+                HOGException("Not implemented or cannot be?")
+
+            hess_blend_expr = hess_blending_evaluate(symbolizer, geometry, blending)
+            # Collecting all expressions to parse for step 3.
+            spat_coord_subs = {}
+            for idx, symbol in enumerate(
+                symbolizer.ref_coords_as_list(geometry.dimensions)
+            ):
+                spat_coord_subs[symbol] = point[idx]
+
+            for d in range(geometry.dimensions):
+                hess_blend_expr_sub = hess_blend_expr[d].subs(spat_coord_subs)
+                for s_ij, e_ij in zip(hess_blend_symbol[d], hess_blend_expr_sub):
+                    if s_ij in hess_blending_in_expr:
+                        assignments.append(SympyAssignment(s_ij, e_ij))
+
+    return assignments
+
+
 def code_block_from_element_matrix(
     element_matrix: sp.Matrix,
     quad: Quadrature,
diff --git a/hog/external_functions.py b/hog/external_functions.py
index 6319ba2871d6b983f1611d45b6af2d4bb7fc392b..c22943568e72627f142cdb35cc116b95c8c5c8c6 100644
--- a/hog/external_functions.py
+++ b/hog/external_functions.py
@@ -76,6 +76,32 @@ geometryMap_->evalDF( mappedPt, DPsi );
 *out_3 = DPsi( 1, 1 );"""
 
 
+class BlendingDFInvDFTriangle(MultiAssignment):
+    def function_name(self):
+        return "Blending_DFInvDF_Triangle"
+
+    @classmethod
+    def num_input_args(cls):
+        return 2
+
+    @classmethod
+    def num_output_args(cls):
+        return 2 * 2 * 2
+
+    def implementation(self):
+        return """Point3D  mappedPt( {in_0, in_1, 0} );
+Matrixr< 2, 4 > DInvDPsi;
+geometryMap_->evalDFinvDF( mappedPt, DInvDPsi );
+*out_0 = DInvDPsi( 0, 0 );
+*out_1 = DInvDPsi( 0, 1 );
+*out_2 = DInvDPsi( 0, 2 );
+*out_3 = DInvDPsi( 0, 3 );
+*out_4 = DInvDPsi( 1, 0 );
+*out_5 = DInvDPsi( 1, 1 );
+*out_6 = DInvDPsi( 1, 2 );
+*out_7 = DInvDPsi( 1, 3 );"""
+
+
 class BlendingDFTetrahedron(MultiAssignment):
     def function_name(self):
         return "Blending_DF_Tetrahedron"
diff --git a/hog/fem_helpers.py b/hog/fem_helpers.py
index 7696e360d4961ae231d5dc8576e6c0e39e38ca86..13c5dff89aad186e7034e229812d13a5678e1f5b 100644
--- a/hog/fem_helpers.py
+++ b/hog/fem_helpers.py
@@ -41,6 +41,7 @@ from hog.external_functions import (
     BlendingFTetrahedron,
     BlendingDFTetrahedron,
     BlendingDFTriangle,
+    BlendingDFInvDFTriangle,
     ScalarVariableCoefficient2D,
     ScalarVariableCoefficient3D,
     VectorVariableCoefficient3D,
@@ -65,6 +66,8 @@ class ElementMatrixData:
     test_shape: sp.Expr
     trial_shape_grad: sp.MatrixBase
     test_shape_grad: sp.MatrixBase
+    trial_shape_hessian: sp.MatrixBase
+    test_shape_hessian: sp.MatrixBase
     row: int
     col: int
 
@@ -73,17 +76,27 @@ def element_matrix_iterator(
     trial: FunctionSpace, test: FunctionSpace, geometry: ElementGeometry
 ) -> Iterator[ElementMatrixData]:
     """Call this to create a generator to conveniently fill the element matrix."""
-    for row, (psi, grad_psi) in enumerate(
-        zip(test.shape(geometry), test.grad_shape(geometry))
+    for row, (psi, grad_psi, hessian_psi) in enumerate(
+        zip(
+            test.shape(geometry),
+            test.grad_shape(geometry),
+            test.hessian_shape(geometry),
+        )
     ):
-        for col, (phi, grad_phi) in enumerate(
-            zip(trial.shape(geometry), trial.grad_shape(geometry))
+        for col, (phi, grad_phi, hessian_phi) in enumerate(
+            zip(
+                trial.shape(geometry),
+                trial.grad_shape(geometry),
+                trial.hessian_shape(geometry),
+            )
         ):
             yield ElementMatrixData(
                 trial_shape=phi,
                 trial_shape_grad=grad_phi,
+                trial_shape_hessian=hessian_phi,
                 test_shape=psi,
                 test_shape_grad=grad_psi,
+                test_shape_hessian=hessian_psi,
                 row=row,
                 col=col,
             )
@@ -254,6 +267,14 @@ def jac_blending_evaluate(
     jac = blending.jacobian(trafo_ref_to_affine(geometry, symbolizer, affine_points))
     return jac
 
+def hess_blending_evaluate(
+    symbolizer: Symbolizer, geometry: ElementGeometry, blending: GeometryMap
+) -> List[sp.Matrix]:
+    affine_points = symbolizer.affine_vertices_as_vectors(
+        geometry.dimensions, geometry.num_vertices
+    )
+    hess = blending.hessian(trafo_ref_to_affine(geometry, symbolizer, affine_points))
+    return hess
 
 def abs_det_jac_blending_eval_symbols(
     geometry: ElementGeometry, symbolizer: Symbolizer, q_pt: str = ""
@@ -269,6 +290,44 @@ def jac_blending_inv_eval_symbols(
     return inv(jac_blending)
 
 
+def hessian_ref_to_affine(
+    geometry: ElementGeometry, hessian_ref: sp.Matrix, Jinv: sp.Matrix
+) -> sp.Matrix:
+    hessian_affine = Jinv.T * hessian_ref * Jinv
+    return hessian_affine
+
+
+def hessian_affine_to_blending(
+    geometry: ElementGeometry,
+    hessian_affine: sp.Matrix,
+    hessian_blending_map: List[sp.Matrix],
+    Jinv: sp.Matrix,
+    shape_grad_affine: sp.Matrix,
+) -> sp.Matrix:
+    """
+    This stack answer was for nonlinear FE mapping (Q2 elements) but just using the same derivation for our blending nonlinear mapping
+    https://scicomp.stackexchange.com/q/36780
+    """
+
+    jacinvjac_blending = []
+    # jacinvjac_blending = sp.MutableDenseNDimArray(hessian_blending_map) * 0.0
+
+    for i in range(geometry.dimensions):
+        jacinvjac_blending.append(-Jinv * hessian_blending_map[i] * Jinv)
+
+    hessian_blending = Jinv * hessian_affine * Jinv.T
+
+    d = geometry.dimensions
+    aux_matrix = sp.zeros(d, d)
+
+    for i in range(geometry.dimensions):
+        aux_matrix[:, i] = jacinvjac_blending[i] * shape_grad_affine
+
+    hessian_blending += Jinv * aux_matrix
+
+    return hessian_blending
+
+
 def scalar_space_dependent_coefficient(
     name: str,
     geometry: ElementGeometry,
diff --git a/hog/forms.py b/hog/forms.py
index d9ba81b8ea82d4e71f9a0138e429ce7600302355..8f316d6f6dfa1b085a763c5c7a805713775d8082 100644
--- a/hog/forms.py
+++ b/hog/forms.py
@@ -20,12 +20,14 @@ import sympy as sp
 from typing import List, Optional, Tuple
 
 from hog.ast import Operations, count_operations
-from hog.element_geometry import ElementGeometry
+from hog.element_geometry import ElementGeometry, TriangleElement, TetrahedronElement
 from hog.exception import HOGException
 from hog.fem_helpers import (
     trafo_ref_to_affine,
     jac_ref_to_affine,
     jac_affine_to_physical,
+    hessian_ref_to_affine,
+    hessian_affine_to_blending,
     create_empty_element_matrix,
     element_matrix_iterator,
     scalar_space_dependent_coefficient,
@@ -1293,6 +1295,7 @@ where
         docstring=docstring,
     )
 
+
 def shear_heating(
     trial: FunctionSpace,
     test: FunctionSpace,
@@ -1530,6 +1533,8 @@ The resulting matrix must be multiplied with a vector of ones to be used as the
         docstring=docstring,
     )
 
+
+
 def divdiv(
     trial: FunctionSpace,
     test: FunctionSpace,
@@ -1635,6 +1640,185 @@ Weak formulation
     )
 
 
+def supg_diffusion(
+    trial: FunctionSpace,
+    test: FunctionSpace,
+    geometry: ElementGeometry,
+    symbolizer: Symbolizer,
+    velocity_function_space: FunctionSpace,
+    diffusivityXdelta_function_space: FunctionSpace,
+    blending: GeometryMap = IdentityMap(),
+) -> Form:
+    docstring = f"""
+Form for SUPGDiffusion operator used for SUPG stabilisation
+
+Geometry map: {blending}
+
+Weak formulation
+
+    T: trial function (space: {trial})
+    s: test function  (space: {test})
+    u: velocity function (space: {velocity_function_space})
+   k𝛿: FE function representing k·𝛿 (space: {diffusivityXdelta_function_space})
+    
+    For OpGen,
+
+    ∫ k(ΔT) · 𝛿(u · ∇s)
+
+    -------------------
+
+    For ExternalMap (only for testing, currently not supported),
+
+    ∫ (ΔT) s
+"""
+
+    if trial != test:
+        raise HOGException(
+            "Trial space must be equal to test space to assemble SUPG diffusion matrix."
+        )
+
+    with TimedLogger("assembling second derivative matrix", level=logging.DEBUG):
+        tabulation = Tabulation(symbolizer)
+
+        jac_affine = symbolizer.jac_ref_to_affine(geometry.dimensions)
+        jac_affine_inv = symbolizer.jac_ref_to_affine_inv(geometry.dimensions)
+        jac_affine_det = symbolizer.abs_det_jac_ref_to_affine()
+
+        if isinstance(blending, ExternalMap):
+            HOGException("ExternalMap is not supported")
+        else:
+            affine_coords = trafo_ref_to_affine(geometry, symbolizer)
+            jac_blending = symbolizer.jac_affine_to_blending(geometry.dimensions)
+            jac_blending_inv = symbolizer.jac_affine_to_blending_inv(
+                geometry.dimensions
+            )
+            jac_blending_det = symbolizer.abs_det_jac_affine_to_blending()
+            if not isinstance(blending, IdentityMap):
+                # hessian_blending_map = blending.hessian(affine_coords)
+                hessian_blending_map = symbolizer.hessian_blending_map(geometry.dimensions)
+
+        # jac_blending_det = abs(det(jac_blending))
+        # with TimedLogger("inverting blending Jacobian", level=logging.DEBUG):
+        #     jac_blending_inv = inv(jac_blending)
+
+        mat = create_empty_element_matrix(trial, test, geometry)
+        it = element_matrix_iterator(trial, test, geometry)
+
+        if velocity_function_space != None and diffusivityXdelta_function_space != None:
+            u_eval_symbols = tabulation.register_phi_evals(
+                velocity_function_space.shape(geometry)
+            )
+
+            ux, _ = fem_function_on_element(
+                velocity_function_space,
+                geometry,
+                symbolizer,
+                domain="reference",
+                function_id="ux",
+                basis_eval=u_eval_symbols,
+            )
+
+            uy, _ = fem_function_on_element(
+                velocity_function_space,
+                geometry,
+                symbolizer,
+                domain="reference",
+                function_id="uy",
+                basis_eval=u_eval_symbols,
+            )
+
+            if isinstance(geometry, TetrahedronElement):
+                uz, _ = fem_function_on_element(
+                    velocity_function_space,
+                    geometry,
+                    symbolizer,
+                    domain="reference",
+                    function_id="uz",
+                    basis_eval=u_eval_symbols,
+                )
+                u = sp.Matrix([[ux], [uy], [uz]])
+            else:
+                u = sp.Matrix([[ux], [uy]])
+
+            kdelta_eval_symbols = tabulation.register_phi_evals(
+                diffusivityXdelta_function_space.shape(geometry)
+            )
+
+            kdelta, _ = fem_function_on_element(
+                diffusivityXdelta_function_space,
+                geometry,
+                symbolizer,
+                domain="reference",
+                function_id="kdelta",
+                basis_eval=kdelta_eval_symbols,
+            )
+
+        for data in it:
+            psi = data.test_shape
+            grad_phi = data.trial_shape_grad
+            grad_psi = data.test_shape_grad
+            hessian_phi = data.trial_shape_hessian
+            hessian_affine = hessian_ref_to_affine(
+                geometry, hessian_phi, jac_affine_inv
+            )
+
+            hessian_affine_symbols = tabulation.register_factor(
+                "hessian_affine",
+                hessian_affine,
+            )
+
+            jac_affine_inv_T_grad_phi_symbols = tabulation.register_factor(
+                "jac_affine_inv_T_grad_phi",
+                jac_affine_inv.T * grad_phi,
+            )
+
+            jac_affine_inv_T_grad_psi_symbols = tabulation.register_factor(
+                "jac_affine_inv_T_grad_psi",
+                jac_affine_inv.T * grad_psi,
+            )
+
+            # jac_blending_inv_T_jac_affine_inv_T_grad_psi_symbols = tabulation.register_factor(
+            #     "jac_affine_inv_T_grad_psi",
+            #     jac_blending_inv.T * jac_affine_inv_T_grad_psi_symbols,
+            # )
+
+            if isinstance(blending, IdentityMap):
+                laplacian = sum(
+                    [hessian_affine_symbols[i, i] for i in range(geometry.dimensions)]
+                )
+                form = (
+                    laplacian
+                    * dot(u, jac_affine_inv_T_grad_psi_symbols)
+                    * kdelta
+                    * jac_affine_det
+                )
+            else:
+                hessian_blending = hessian_affine_to_blending(
+                    geometry,
+                    hessian_affine,
+                    hessian_blending_map,
+                    jac_blending_inv.T,
+                    jac_affine_inv_T_grad_phi_symbols,
+                )
+
+                laplacian = sum(
+                    [hessian_blending[i, i] for i in range(geometry.dimensions)]
+                )
+
+                form = (
+                    laplacian
+                    * dot(u, jac_blending_inv.T * jac_affine_inv.T * grad_psi)
+                    * kdelta
+                    * jac_affine_det
+                    * jac_blending_det
+                )
+                # HOGException("Only for testing with Blending map")
+
+            mat[data.row, data.col] = form
+
+    return Form(mat, tabulation, symmetric=False, docstring=docstring)
+
+
 def zero_form(
     trial: FunctionSpace, test: FunctionSpace, geometry: ElementGeometry
 ) -> sp.Matrix:
diff --git a/hog/function_space.py b/hog/function_space.py
index 67dfe74c8c29e70f34941ada124f535276c1c8ac..7d0823a103fb946f7fda56c8dd33657a405e3cb3 100644
--- a/hog/function_space.py
+++ b/hog/function_space.py
@@ -20,7 +20,7 @@ import sympy as sp
 
 from hog.element_geometry import ElementGeometry, TriangleElement, TetrahedronElement
 from hog.exception import HOGException
-from hog.math_helpers import grad
+from hog.math_helpers import grad, hessian
 from hog.symbolizer import Symbolizer
 
 
@@ -93,6 +93,28 @@ class FunctionSpace(Protocol):
         raise HOGException(
             f"Gradient of shape function not available for domain type {domain}"
         )
+    
+    def hessian_shape(
+        self,
+        geometry: ElementGeometry,
+        domain: str = "reference",
+        dof_map: Optional[List[int]] = None,
+    ) -> List[sp.MatrixBase]:
+        """Returns a list containing the hessians of the shape functions on the element.
+
+        :param dof_map: this list can be used to specify (remap) the DoF ordering of the element
+        """
+        if domain in ["ref", "reference"]:
+            symbols = self.symbolizer.ref_coords_as_list(geometry.dimensions)
+            basis_functions_hessians = [
+                hessian(f, symbols)
+                for f in self.shape(geometry, domain=domain, dof_map=dof_map)
+            ]
+            return basis_functions_hessians
+
+        raise HOGException(
+            f"Hessian of shape function not available for domain type {domain}"
+        )
 
     def num_dofs(self, geometry: ElementGeometry) -> int:
         """The number of DoFs per element."""
diff --git a/hog/math_helpers.py b/hog/math_helpers.py
index eca3b78004681a44941794b6d887939d14de7e71..a0fdeaefecb7dca8f1c64e9c5e5416ee633c07ce 100644
--- a/hog/math_helpers.py
+++ b/hog/math_helpers.py
@@ -55,6 +55,12 @@ def grad(f: Union[sp.Expr, sp.MatrixBase], symbols: List[sp.Symbol]) -> sp.Matri
     raise HOGException("Invalid data type in grad().")
 
 
+def hessian(f: sp.Expr, symbols: List[sp.Symbol]) -> sp.MatrixBase:
+    """Returns the hessian of the passed sympy expression with respect to the passed symbols."""
+    df = grad(f, symbols)
+    return sp.Matrix([[sp.simplify(sp.diff(g, s)) for s in symbols] for g in df])
+
+
 def curl(u: sp.Matrix, symbols: List[sp.Symbol]) -> sp.Expr:
     """Returns the curl of the passed sympy matrix with respect to the passed symbols."""
     if u.shape != (3, 1) or len(symbols) != 3:
diff --git a/hog/operator_generation/operators.py b/hog/operator_generation/operators.py
index 80234461cd663e1af45aa2cccfc83b081973b681..34b28661fe21af58c4e2168cab54a6680308f009 100644
--- a/hog/operator_generation/operators.py
+++ b/hog/operator_generation/operators.py
@@ -908,6 +908,18 @@ class HyTeGElementwiseOperator:
                     )
                 )
 
+                blending_assignments += (
+                    hog.code_generation.hessian_matrix_assignments(
+                        mat,
+                        integration_info.tables + quad_loop,
+                        geometry,
+                        self.symbolizer,
+                        affine_points=element_vertex_coordinates_symbols,
+                        blending=integration_info.blending,
+                        quad_info=integration_info.quad,
+                    )
+                )
+
                 with TimedLogger("cse on blending operation", logging.DEBUG):
                     cse_impl = self._optimizer.cse_impl()
                     blending_assignments = hog.cse.cse(
diff --git a/hog/quadrature/quad_loop.py b/hog/quadrature/quad_loop.py
index e550ace591d4ffb435327dd6c9a247a46a613f22..5c8444997041774c9b7971e9ad2f5d5cd5118af4 100644
--- a/hog/quadrature/quad_loop.py
+++ b/hog/quadrature/quad_loop.py
@@ -32,6 +32,7 @@ from hog.symbolizer import Symbolizer
 from hog.sympy_extensions import fast_subs
 from hog.fem_helpers import (
     jac_blending_evaluate,
+    hess_blending_evaluate,
     abs_det_jac_blending_eval_symbols,
     jac_blending_inv_eval_symbols,
 )
@@ -113,6 +114,14 @@ class QuadLoop:
                 self.quadrature.geometry, self.symbolizer, jac_evaluated
             )
 
+            hess = hess_blending_evaluate(
+                self.symbolizer, self.quadrature.geometry, self.blending
+            )
+            hess_evaluated = [fast_subs(hess[idx], coord_subs_dict) for idx in range(len(hess))]
+            quadrature_assignments += self.blending_hessian_quad_loop_assignments(
+                self.quadrature.geometry, self.symbolizer, hess_evaluated
+            )
+
         for row in range(self.mat_integrand.rows):
             for col in range(self.mat_integrand.cols):
                 tmp_symbol = sp.Symbol(f"q_tmp_{row}_{col}")
@@ -220,3 +229,23 @@ class QuadLoop:
         ]
 
         return quadrature_assignments
+    
+    def blending_hessian_quad_loop_assignments(
+        self,
+        geometry: ElementGeometry,
+        symbolizer: Symbolizer,
+        hessian_blending_evaluated: List[sp.Matrix],
+    ) -> List[ast.SympyAssignment]:
+        quadrature_assignments = []
+
+        hess_symbols = symbolizer.hessian_blending_map(geometry.dimensions)
+
+        dim = geometry.dimensions
+        quadrature_assignments += [
+            ast.SympyAssignment(hess_symbols[k][i, j], hessian_blending_evaluated[k][i, j], is_const=False)
+            for i in range(dim)
+            for j in range(dim)
+            for k in range(dim)
+        ]
+
+        return quadrature_assignments
diff --git a/hog/symbolizer.py b/hog/symbolizer.py
index 064e0323ad7ac75bc44dd9babaa5e17bc3045a0c..721e82000c6707a5fe5ff7069c891fad25b427b2 100644
--- a/hog/symbolizer.py
+++ b/hog/symbolizer.py
@@ -47,6 +47,7 @@ class Symbolizer:
         symbol_jac_blending="jac_blending",
         symbol_jac_blending_inv="jac_blending_inv",
         symbol_abs_det_jac_blending="abs_det_jac_blending",
+        symbol_hessian_blending="hessian_blending",
     ):
         """Creates a Symbolizer instance.
 
@@ -79,6 +80,7 @@ class Symbolizer:
         self._symbol_jac_blending = symbol_jac_blending
         self._symbol_jac_blending_inv = symbol_jac_blending_inv
         self._symbol_abs_det_jac_blending = symbol_abs_det_jac_blending
+        self._symbol_hessian_blending = symbol_hessian_blending
 
     def ref_coords_as_list(self, dimensions: int) -> List[sp.Symbol]:
         """Returns a list of symbols that correspond to the coordinates on the reference element."""
@@ -214,6 +216,19 @@ class Symbolizer:
 
     def abs_det_jac_affine_to_blending(self, q_pt: str = "") -> sp.Symbol:
         return sp.Symbol(f"{self._symbol_abs_det_jac_blending}{q_pt}")
+    
+    def hessian_blending_map(self, dimensions: int, q_pt: str = "") -> List[sp.Matrix]:
+        return [
+            sp.Matrix(
+                [
+                    [
+                        sp.Symbol(f"{self._symbol_hessian_blending}_{k}_{i}_{j}{q_pt}")
+                        for j in range(dimensions)
+                    ]
+                    for i in range(dimensions)
+                ]
+            ) for k in range(dimensions)
+        ]
 
     def blending_parameter_symbols(self, num_symbols: int) -> List[sp.Symbol]:
         return sp.symbols(
@@ -225,6 +240,7 @@ class Symbolizer:
             list(self.jac_affine_to_blending(dimensions).free_symbols)
             + list(self.jac_affine_to_blending_inv(dimensions).free_symbols)
             + list(self.abs_det_jac_affine_to_blending().free_symbols)
+            + list(sp.Array(self.hessian_blending_map(dimensions)).free_symbols)
         )
 
     def float_loop_ctr_array(self, dimensions: int) -> List[sp.Symbol]:
diff --git a/hyteg_integration_tests/src/CMakeLists.txt b/hyteg_integration_tests/src/CMakeLists.txt
index 66b018d19bdc7822f080abbcdc7935a1aa4703e7..d1c23b1d28ffd1483fb72bd7b46dcdb6527e8820 100644
--- a/hyteg_integration_tests/src/CMakeLists.txt
+++ b/hyteg_integration_tests/src/CMakeLists.txt
@@ -61,6 +61,10 @@ add_operator_test(FILE Epsilon.cpp DEF           FORM=forms::p2_epsilonvar_2_1_a
 add_operator_test(FILE EpsilonVector.cpp        DEF           FORM Epsilon                                             ABBR VecVIQT  GEN_ARGS -s P2Vector               -o MOVECONSTANTS QUADLOOPS TABULATE  --quad-degree 2 2 --dimensions 2 LIBS constant_stencil_operator)
 add_operator_test(FILE EpsilonVectorAnnulus.cpp DEF           FORM Epsilon                                             ABBR VecbVIQT GEN_ARGS -s P2Vector -b AnnulusMap -o MOVECONSTANTS QUADLOOPS TABULATE  --quad-degree 2 2 --dimensions 2 LIBS constant_stencil_operator)
 
+add_operator_test(FILE SUPGDiffusionTest.cpp DEF           FORM SUPGDiffusion ABBR supgdiffIMT GEN_ARGS -s P2 -b IdentityMap -o MOVECONSTANTS --quad-rule hillion_07 yu_3 --dimensions 2)
+add_operator_test(FILE SUPGDiffusionTest.cpp DEF           FORM SUPGDiffusion ABBR supgdiffIQMT GEN_ARGS -s P2 -b IdentityMap -o MOVECONSTANTS QUADLOOPS TABULATE --quad-rule hillion_07 yu_3 --dimensions 2)
+add_operator_test(FILE SUPGDiffusionAnnulusTest.cpp DEF           FORM SUPGDiffusion ABBR supgdiffAQMT GEN_ARGS -s P2 -b AnnulusMap -o MOVECONSTANTS QUADLOOPS TABULATE --quad-rule hillion_07 yu_3)
+
 # tests with blending
 
 add_operator_test(FILE DiffusionP1Annulus.cpp DEF TEST_ASSEMBLE FORM Diffusion ABBR b2     GEN_ARGS -s P1 -b AnnulusMap --quad-rule hillion_07 yu_3)
@@ -69,4 +73,3 @@ add_operator_test(FILE DiffusionP1IcosahedralShell.cpp          FORM Diffusion A
 
 add_operator_test(FILE FullStokes.cpp DEF TEST_DIAG FORM=forms::p2_full_stokesvar_0_0_blending_q3 FORM FullStokes_0_0 ABBR 00b3VIQT GEN_ARGS -s P2 -b IcosahedralShellMap -o MOVECONSTANTS QUADLOOPS TABULATE VECTORIZE --quad-rule yu_3 yu_3)
 add_operator_test(FILE FullStokes.cpp DEF           FORM=forms::p2_full_stokesvar_2_1_blending_q3 FORM FullStokes_2_1 ABBR 21b3VIQT GEN_ARGS -s P2 -b IcosahedralShellMap -o MOVECONSTANTS QUADLOOPS TABULATE VECTORIZE --quad-rule yu_3 yu_3)
-
diff --git a/hyteg_integration_tests/src/SUPGDiffusionAnnulusTest.cpp b/hyteg_integration_tests/src/SUPGDiffusionAnnulusTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cb0e3b306faa5ddc32e71535f80e7ec106d3df98
--- /dev/null
+++ b/hyteg_integration_tests/src/SUPGDiffusionAnnulusTest.cpp
@@ -0,0 +1,109 @@
+/*
+ * HyTeG Operator Generator
+ * Copyright (C) 2017-2024  Nils Kohl, Fabian Böhm, Daniel Bauer
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "core/math/Constants.h"
+#include "core/DataTypes.h"
+
+#include "hyteg/p2functionspace/P2Function.hpp"
+
+#include "SUPGDiffusion/TestOpSUPGDiffusion.hpp"
+#include "OperatorGenerationTest.hpp"
+
+using namespace hyteg;
+using walberla::real_t;
+
+int main( int argc, char* argv[] )
+{
+   walberla::MPIManager::instance()->initializeMPI( &argc, &argv );
+   walberla::MPIManager::instance()->useWorldComm();
+
+   const real_t rMin = 0.5;
+   const real_t rMax = 1.0;
+
+   const uint_t nTan = 5U;
+   const uint_t nRad = 2U;
+
+   auto meshInfo = MeshInfo::meshAnnulus( rMin, rMax, MeshInfo::CRISS, nTan, nRad );
+
+    StorageSetup storageSetupRectangle(
+       "Annulus", meshInfo, GeometryMap::Type::ANNULUS );
+
+    auto storage = storageSetupRectangle.createStorage();
+
+    const uint_t level = 2U;
+
+   P2Function< real_t >       T( "T", storage, level, level );
+   P2Function< real_t >       f( "f", storage, level, level );
+   P2Function< real_t >       sh( "sh", storage, level, level );
+   P2Function< real_t >       kd( "kd", storage, level, level );
+
+   P2VectorFunction< real_t > u("u", storage, level, level);
+
+   std::function<real_t(const Point3D&)> uX = [](const Point3D& x)
+   {
+    return x[0];
+   };
+
+   std::function<real_t(const Point3D&)> uY = [](const Point3D& x)
+   {
+    return x[1];
+   };
+   
+   std::function<real_t(const Point3D&)> uZ = [](const Point3D& x)
+   {
+    return x[2];
+   };
+
+   std::function<real_t(const Point3D&)> kdFunc = [](const Point3D& x)
+   {
+      real_t r = x.norm();
+    return r * r;
+   };
+
+   std::function<real_t(const Point3D&)> shFunc = [](const Point3D& x)
+   {
+      real_t r = x.norm();
+    return r * r;
+   };
+
+   std::function<real_t(const Point3D&)> TFunc = [](const Point3D& x)
+   {
+      real_t r = x.norm();
+    return r * r;
+   };
+
+   T.interpolate(TFunc, level, All);
+   u.interpolate({uX, uY}, level, All);
+   kd.interpolate(kdFunc, level, All);
+   sh.interpolate(shFunc, level, All);
+
+   operatorgeneration::TestOpSUPGDiffusion testOperator(storage, level, level, kd, u.component(0U), u.component(1U));
+
+   testOperator.apply(T, f, level, All);
+
+   real_t integral = sh.dotGlobal(f, level, All);
+   real_t expected = 2*walberla::math::pi*((4.0/3.0)*std::pow(rMax, 6) - 4.0/3.0*std::pow(rMin, 6));
+
+   real_t error = std::abs(integral - expected);
+
+   real_t threshold = 1e-3;
+
+   WALBERLA_CHECK_LESS(error, threshold);
+
+   return EXIT_SUCCESS;
+}
diff --git a/hyteg_integration_tests/src/SUPGDiffusionTest.cpp b/hyteg_integration_tests/src/SUPGDiffusionTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8319c79f4e3616465953bb44915aad02647f9fb8
--- /dev/null
+++ b/hyteg_integration_tests/src/SUPGDiffusionTest.cpp
@@ -0,0 +1,106 @@
+/*
+ * HyTeG Operator Generator
+ * Copyright (C) 2017-2024  Nils Kohl, Fabian Böhm, Daniel Bauer
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "core/DataTypes.h"
+
+#include "hyteg/p2functionspace/P2Function.hpp"
+
+#include "SUPGDiffusion/TestOpSUPGDiffusion.hpp"
+#include "OperatorGenerationTest.hpp"
+
+using namespace hyteg;
+using walberla::real_t;
+
+int main( int argc, char* argv[] )
+{
+   walberla::MPIManager::instance()->initializeMPI( &argc, &argv );
+   walberla::MPIManager::instance()->useWorldComm();
+
+   Point2D lowerLeft(0.0, 0.0);
+   Point2D upperRight(1.0, 1.0);
+
+   uint_t nx = 10U, ny = 10U, nz = 10U;
+   auto meshInfo = MeshInfo::meshRectangle(lowerLeft, upperRight, MeshInfo::CRISS, nx, ny);
+
+    StorageSetup storageSetupRectangle(
+       "Rectangle", MeshInfo::meshRectangle(lowerLeft, upperRight, MeshInfo::CRISS, nx, ny), GeometryMap::Type::IDENTITY );
+
+    auto storage = storageSetupRectangle.createStorage();
+
+    const uint_t level = 2U;
+
+   P2Function< real_t >       T( "T", storage, level, level );
+   P2Function< real_t >       f( "f", storage, level, level );
+   P2Function< real_t >       sh( "sh", storage, level, level );
+   P2Function< real_t >       kd( "kd", storage, level, level );
+
+   P2VectorFunction< real_t > u("u", storage, level, level);
+
+   std::function<real_t(const Point3D&)> uX = [](const Point3D& x)
+   {
+    return x[0];
+   };
+
+   std::function<real_t(const Point3D&)> uY = [](const Point3D& x)
+   {
+    return x[1];
+   };
+   
+   std::function<real_t(const Point3D&)> uZ = [](const Point3D& x)
+   {
+    return x[2];
+   };
+
+   std::function<real_t(const Point3D&)> kdFunc = [](const Point3D& x)
+   {
+      real_t r = x.norm();
+    return r * r;
+   };
+
+   std::function<real_t(const Point3D&)> shFunc = [](const Point3D& x)
+   {
+      real_t r = x.norm();
+    return r * r;
+   };
+
+   std::function<real_t(const Point3D&)> TFunc = [](const Point3D& x)
+   {
+      real_t r = x.norm();
+    return r * r;
+   };
+
+   T.interpolate(TFunc, level, All);
+   u.interpolate({uX, uY}, level, All);
+   kd.interpolate(kdFunc, level, All);
+   sh.interpolate(shFunc, level, All);
+
+   operatorgeneration::TestOpSUPGDiffusion testOperator(storage, level, level, kd, u.component(0U), u.component(1U));
+
+   testOperator.apply(T, f, level, All);
+
+   real_t integral = sh.dotGlobal(f, level, All);
+   real_t expected = 224.0/45.0;
+
+   real_t error = std::abs(integral - expected);
+
+   real_t threshold = 1e-7;
+
+   WALBERLA_CHECK_LESS(error, threshold);
+
+   return EXIT_SUCCESS;
+}