diff --git a/generate_all_operators.py b/generate_all_operators.py
index a3a4bf0a71117e8ea8d7c01d0d3afb1f5351f74d..33edc497fbe9a2f9e94e8f370ba1215c503410da 100644
--- a/generate_all_operators.py
+++ b/generate_all_operators.py
@@ -39,6 +39,9 @@ from hog.forms import (
     div_k_grad,
     shear_heating,
     epsilon,
+    epsilon_rotation,
+    divergence_rotation,
+    pspg,
     full_stokes,
     nonlinear_diffusion,
     nonlinear_diffusion_newton_galerkin,
@@ -613,6 +616,69 @@ def all_operators(
         )
     )
 
+    p2vec_epsilon_rotation = partial(
+        epsilon_rotation,
+        variable_viscosity=True,
+        coefficient_function_space=P2,
+    )
+    ops.append(
+        OperatorInfo(
+            mapping=f"P2Vector",
+            name=f"EpsilonRotation",
+            trial_space=P2Vector,
+            test_space=P2Vector,
+            form=p2vec_epsilon_rotation,
+            type_descriptor=type_descriptor,
+            geometries=list(geometries),
+            opts=opts,
+            blending=blending,
+        )
+    )
+
+    ops.append(
+        OperatorInfo(
+            mapping=f"P2VectorToP1",
+            name=f"DivergenceRotation",
+            trial_space=P1,
+            test_space=P2Vector,
+            form=divergence_rotation,
+            type_descriptor=type_descriptor,
+            geometries=list(geometries),
+            opts=opts,
+            blending=blending,
+        )
+    )
+
+    ops.append(
+        OperatorInfo(
+            mapping=f"P1ToP2Vector",
+            name=f"GradientRotation",
+            trial_space=P2Vector,
+            test_space=P1,
+            form=partial(divergence_rotation, transpose = True),
+            type_descriptor=type_descriptor,
+            geometries=list(geometries),
+            opts=opts,
+            blending=blending,
+        )
+    )
+
+    ops.append(
+        OperatorInfo(
+            mapping=f"P1",
+            name=f"PSPG",
+            trial_space=P1,
+            test_space=P1,
+            form=pspg,
+            type_descriptor=type_descriptor,
+            geometries=list(geometries),
+            opts=opts,
+            blending=blending,
+        )
+    )
+
+
+
     for c in [0, 1, 2]:
         # fmt: off
         if c == 2:
diff --git a/hog/forms.py b/hog/forms.py
index 8f316d6f6dfa1b085a763c5c7a805713775d8082..142073feb65af88bc302d9e70d39b538c84e6809 100644
--- a/hog/forms.py
+++ b/hog/forms.py
@@ -41,6 +41,97 @@ from hog.quadrature import Quadrature, Tabulation
 from hog.symbolizer import Symbolizer
 from hog.logger import TimedLogger, get_logger
 from hog.blending import GeometryMap, ExternalMap, IdentityMap
+from enum import Enum
+
+
+class RotationType(Enum):
+    PRE_MULTIPLY = 1
+    POST_MULTIPLY = 2
+    PRE_AND_POST_MULTIPLY = 3
+
+
+def calculate_rotation_matrix(
+    u_fspace: FunctionSpace,
+    geometry: ElementGeometry,
+    n_dof_symbols: List[List[sp.Matrix]],
+) -> sp.MatrixBase:
+    rotmat = create_empty_element_matrix(u_fspace, u_fspace, geometry)
+
+    for data in element_matrix_iterator(
+        u_fspace.component_function_space, u_fspace.component_function_space, geometry
+    ):
+        if data.row != data.col:
+            continue
+
+        nx_ = n_dof_symbols[0][data.row]
+        ny_ = n_dof_symbols[1][data.row]
+
+        if geometry.dimensions == 2:
+            n_ = sp.Matrix([[nx_], [ny_]])
+            nnorm = sp.sqrt(nx_ * nx_ + ny_ * ny_)
+            rotmat_ = sp.Matrix([[-ny_, nx_], [nx_, ny_]])
+        elif geometry.dimensions == 3:
+            nz_ = n_dof_symbols[2][data.row]
+            n_ = sp.Matrix([[nx_], [ny_], [nz_]])
+            nnorm = sp.sqrt(nx_ * nx_ + ny_ * ny_ + nz_ * nz_)
+
+            ex = sp.Matrix([[1.0], [0.0], [0.0]])
+            ey = sp.Matrix([[0.0], [1.0], [0.0]])
+            ez = sp.Matrix([[0.0], [0.0], [1.0]])
+
+            ncross = [n_.cross(ex), n_.cross(ey), n_.cross(ez)]
+            ncrossnorm = [nc_.norm() for nc_ in ncross]
+
+            t1all = [
+                n_.cross(ex).normalized(),
+                n_.cross(ey).normalized(),
+                n_.cross(ez).normalized(),
+            ]
+
+            getrotmat = lambda t1: (t1.row_join(n_.cross(t1)).row_join(n_)).T
+
+            machine_eps = 1e-10
+            rotmat_ = sp.eye(geometry.dimensions)
+            for iRot in range(geometry.dimensions):
+                for jRot in range(geometry.dimensions):
+                    rotmat_[iRot, jRot] = sp.Piecewise(
+                        (
+                            getrotmat(t1all[0])[iRot, jRot],
+                            sp.And(
+                                ncrossnorm[0] + machine_eps > ncrossnorm[1],
+                                ncrossnorm[0] + machine_eps > ncrossnorm[2],
+                            ),
+                        ),
+                        (
+                            getrotmat(t1all[1])[iRot, jRot],
+                            sp.And(
+                                ncrossnorm[1] + machine_eps > ncrossnorm[0],
+                                ncrossnorm[1] + machine_eps > ncrossnorm[2],
+                            ),
+                        ),
+                        (getrotmat(t1all[2])[iRot, jRot], True),
+                    )
+
+            # print("rotmat_ = ", rotmat_.subs([(nx_, 0.0), (ny_, 0.0), (nz_, 1.0)]))
+            # print("rotmat_ = ", rotmat_[0, 0])
+            # exit()
+        else:
+            HOGException(
+                "We have not reached your dimensions, supreme being or little one"
+            )
+
+        rotmatid_ = sp.eye(geometry.dimensions)
+
+        ndofs = u_fspace.component_function_space.num_dofs(geometry)
+
+        for iRot in range(geometry.dimensions):
+            for jRot in range(geometry.dimensions):
+                rotmat[data.row + iRot * ndofs, data.col + jRot * ndofs] = sp.Piecewise(
+                    (rotmat_[iRot, jRot], nnorm > 0.5),
+                    (rotmatid_[iRot, jRot], True),
+                )
+
+    return rotmat
 
 
 @dataclass
@@ -49,6 +140,8 @@ class Form:
     tabulation: Tabulation
     symmetric: bool
     docstring: str = ""
+    rotmat: sp.MatrixBase = sp.Matrix([[0]])
+    rot_type: RotationType = RotationType.PRE_AND_POST_MULTIPLY
 
     def integrate(self, quad: Quadrature, symbolizer: Symbolizer) -> sp.Matrix:
         """Integrates the form using the passed quadrature directly, i.e. without tabulations or loops."""
@@ -527,7 +620,6 @@ def epsilon(
     variable_viscosity: bool = True,
     coefficient_function_space: Optional[FunctionSpace] = None,
 ) -> Form:
-
     if trial.is_vectorial ^ test.is_vectorial:
         raise HOGException(
             "Either both (trial and test) spaces or none should be vectorial."
@@ -720,6 +812,229 @@ where
     )
 
 
+def epsilon_rotation(
+    trial: FunctionSpace,
+    test: FunctionSpace,
+    geometry: ElementGeometry,
+    symbolizer: Symbolizer,
+    blending: GeometryMap = IdentityMap(),
+    component_trial: int = 0,
+    component_test: int = 0,
+    variable_viscosity: bool = True,
+    coefficient_function_space: Optional[FunctionSpace] = None,
+) -> Form:
+    if trial.is_vectorial ^ test.is_vectorial:
+        raise HOGException(
+            "Either both (trial and test) spaces or none should be vectorial."
+        )
+
+    vectorial_spaces = trial.is_vectorial
+    docstring_components = (
+        ""
+        if vectorial_spaces
+        else f"\nComponent trial: {component_trial}\nComponent test:  {component_test}"
+    )
+
+    docstring = f"""
+"Epsilon" operator.
+{docstring_components}
+Geometry map:    {blending}
+
+Weak formulation
+
+    u: trial function (vectorial space: {trial})
+    v: test function  (vectorial space: {test})
+    μ: coefficient    (scalar space:    {coefficient_function_space})
+
+    ∫ 2 μ ε(u) : ε(v)
+    
+where
+    
+    ε(w) := (1/2) (∇w + (∇w)ᵀ)
+"""
+    if not variable_viscosity:
+        raise HOGException("Constant viscosity currently not supported.")
+        # TODO fix issue with undeclared p_affines
+
+    if (
+        not vectorial_spaces
+        and geometry.dimensions < 3
+        and (component_trial > 1 or component_test > 1)
+    ):
+        return create_empty_element_matrix(trial, test, geometry)
+
+    # if geometry.dimensions > 2:
+    #     raise HOGException("Currently, only testing for 2D")
+
+    with TimedLogger("assembling epsilon matrix", level=logging.DEBUG):
+        tabulation = Tabulation(symbolizer)
+
+        jac_affine = symbolizer.jac_ref_to_affine(geometry.dimensions)
+        jac_affine_inv = symbolizer.jac_ref_to_affine_inv(geometry.dimensions)
+        jac_affine_det = symbolizer.abs_det_jac_ref_to_affine()
+
+        if isinstance(blending, ExternalMap):
+            jac_blending = jac_affine_to_physical(geometry, symbolizer)
+        else:
+            affine_coords = trafo_ref_to_affine(geometry, symbolizer)
+            # jac_blending = blending.jacobian(affine_coords)
+            jac_blending = symbolizer.jac_affine_to_blending(geometry.dimensions)
+
+        # with TimedLogger("inverting blending Jacobian", level=logging.DEBUG):
+        jac_blending_inv = symbolizer.jac_affine_to_blending_inv(geometry.dimensions)
+        jac_blending_det = symbolizer.abs_det_jac_affine_to_blending()
+
+        ref_symbols_list = symbolizer.ref_coords_as_list(geometry.dimensions)
+
+        phi_eval_symbols = tabulation.register_phi_evals(
+            coefficient_function_space.shape(geometry)
+        )
+
+        mu: sp.Expr = 1
+        if variable_viscosity:
+            if coefficient_function_space:
+                mu, _ = fem_function_on_element(
+                    coefficient_function_space,
+                    geometry,
+                    symbolizer,
+                    domain="reference",
+                    function_id="mu",
+                    basis_eval=phi_eval_symbols,
+                )
+            else:
+                mu = scalar_space_dependent_coefficient(
+                    "mu", geometry, symbolizer, blending=blending
+                )
+
+        nx, nx_dof_symbols = fem_function_on_element(
+            coefficient_function_space,
+            geometry,
+            symbolizer,
+            domain="reference",
+            function_id="nx",
+            basis_eval=phi_eval_symbols,
+        )
+
+        ny, ny_dof_symbols = fem_function_on_element(
+            coefficient_function_space,
+            geometry,
+            symbolizer,
+            domain="reference",
+            function_id="ny",
+            basis_eval=phi_eval_symbols,
+        )
+
+        n_dof_symbols = [nx_dof_symbols, ny_dof_symbols]
+
+        if geometry.dimensions == 3:
+            nz, nz_dof_symbols = fem_function_on_element(
+                coefficient_function_space,
+                geometry,
+                symbolizer,
+                domain="reference",
+                function_id="nz",
+                basis_eval=phi_eval_symbols,
+            )
+            n_dof_symbols = [nx_dof_symbols, ny_dof_symbols, nz_dof_symbols]
+
+        mat = create_empty_element_matrix(trial, test, geometry)
+        it = element_matrix_iterator(trial, test, geometry)
+
+        rotmat = calculate_rotation_matrix(trial, geometry, n_dof_symbols)
+
+        for data in it:
+            phi = data.trial_shape
+            psi = data.test_shape
+            grad_phi_vec = data.trial_shape_grad
+            grad_psi_vec = data.test_shape_grad
+
+            # setup of the form expression with tabulation
+            if blending != IdentityMap():
+                # chain rule, premultiply with transposed inverse jacobians of the affine trafo
+                # the results are tensors of order 2
+                # + tabulate affine transformed gradients (can only do this due to incoming, micro-element dependent blending jacobian)
+                jac_affine_inv_T_grad_phi_symbols = sp.Matrix(
+                    tabulation.register_factor(
+                        "jac_affine_inv_T_grad_phi",
+                        jac_affine_inv.T * grad_phi_vec,
+                    )
+                )
+                jac_affine_inv_T_grad_psi_symbols = sp.Matrix(
+                    tabulation.register_factor(
+                        "jac_affine_inv_T_grad_psi",
+                        jac_affine_inv.T * grad_psi_vec,
+                    )
+                )
+
+                # transform gradients according to blending map
+                jac_blending_T_jac_affine_inv_T_grad_phi = (
+                    jac_blending_inv.T * jac_affine_inv_T_grad_phi_symbols
+                )
+                jac_blending_T_jac_affine_inv_T_grad_psi = (
+                    jac_blending_inv.T * jac_affine_inv_T_grad_psi_symbols
+                )
+
+                # extract the symmetric part
+                sym_grad_phi = 0.5 * (
+                    jac_blending_T_jac_affine_inv_T_grad_phi
+                    + jac_blending_T_jac_affine_inv_T_grad_phi.T
+                )
+                sym_grad_psi = 0.5 * (
+                    jac_blending_T_jac_affine_inv_T_grad_psi
+                    + jac_blending_T_jac_affine_inv_T_grad_psi.T
+                )
+
+                # double contract everything + determinants
+                form = (
+                    double_contraction(2 * mu[0, 0] * sym_grad_phi, sym_grad_psi)
+                    * jac_affine_det
+                    * jac_blending_det
+                )
+
+            else:
+                # chain rule, premultiply with transposed inverse jacobians of affine trafo
+                # the results are tensors of order 2
+                jac_affine_inv_T_grad_phi = jac_affine_inv.T * grad_phi_vec
+                jac_affine_inv_T_grad_psi = jac_affine_inv.T * grad_psi_vec
+
+                # now let's extract the symmetric part
+                sym_grad_phi = 0.5 * (
+                    jac_affine_inv_T_grad_phi + jac_affine_inv_T_grad_phi.T
+                )
+                sym_grad_psi = 0.5 * (
+                    jac_affine_inv_T_grad_psi + jac_affine_inv_T_grad_psi.T
+                )
+
+                # double contract everything + determinants, tabulate the whole contraction
+                # TODO maybe shorten naming, although its nice to have everything in the name
+                contract_2_jac_affine_inv_sym_grad_phi_jac_affine_inv_sym_grad_psi_det_symbol = (
+                    tabulation.register_factor(
+                        "contract_2_jac_affine_inv_sym_grad_phi_jac_affine_inv_sym_grad_psi_det_symbol",
+                        double_contraction(2 * sym_grad_phi, sym_grad_psi)
+                        * jac_affine_det,
+                    )
+                )[
+                    0
+                ]
+                form = (
+                    mu
+                    * contract_2_jac_affine_inv_sym_grad_phi_jac_affine_inv_sym_grad_psi_det_symbol
+                )
+
+            mat[data.row, data.col] = form
+
+    epsForm = Form(
+        mat,
+        tabulation,
+        symmetric=vectorial_spaces or (component_trial == component_test),
+        docstring=docstring,
+        rotmat=rotmat,
+        rot_type=RotationType.PRE_AND_POST_MULTIPLY,
+    )
+
+    return epsForm
+
+
 def k_mass(
     trial: FunctionSpace,
     test: FunctionSpace,
@@ -1052,6 +1367,138 @@ Weak formulation
     return Form(mat, Tabulation(symbolizer), symmetric=False, docstring=docstring)
 
 
+def divergence_rotation(
+    trial: FunctionSpace,
+    test: FunctionSpace,
+    geometry: ElementGeometry,
+    symbolizer: Symbolizer,
+    blending: GeometryMap = IdentityMap(),
+    transpose: bool = False,
+) -> Form:
+    if transpose:
+        docstring = f"""
+Gradient.
+
+Geometry map: {blending}
+
+Weak formulation
+
+    u: trial function (scalar space:    {trial})
+    v: test function  (vectorial space: {test})
+
+    ∫ - ( ∇ · v ) u
+"""
+    else:
+        docstring = f"""
+Divergence.
+
+Geometry map: {blending}
+
+Weak formulation
+
+    u: trial function (vectorial space: {trial})
+    v: test function  (scalar space:    {test})
+
+    ∫ - ( ∇ · u ) v
+"""
+
+    with TimedLogger(
+        f"assembling divergence {'transpose' if transpose else ''} matrix",
+        level=logging.DEBUG,
+    ):
+        jac_affine = symbolizer.jac_ref_to_affine(geometry.dimensions)
+        jac_affine_inv = symbolizer.jac_ref_to_affine_inv(geometry.dimensions)
+        jac_affine_det = symbolizer.abs_det_jac_ref_to_affine()
+
+        if isinstance(blending, ExternalMap):
+            jac_blending = jac_affine_to_physical(geometry, symbolizer)
+        else:
+            affine_coords = trafo_ref_to_affine(geometry, symbolizer)
+            jac_blending = blending.jacobian(affine_coords)
+
+        jac_blending_det = abs(det(jac_blending))
+        with TimedLogger("inverting blending Jacobian", level=logging.DEBUG):
+            jac_blending_inv = inv(jac_blending)
+
+        mat = create_empty_element_matrix(trial, test, geometry)
+
+        it = element_matrix_iterator(trial, test, geometry)
+
+        tabulation = Tabulation(symbolizer)
+
+        if transpose:
+            normals_function_space = test
+            normals_component_function_space = test.component_function_space
+        else:
+            normals_function_space = trial
+            normals_component_function_space = trial.component_function_space
+
+        phi_eval_symbols = tabulation.register_phi_evals(
+            normals_component_function_space.shape(geometry)
+        )
+
+        nx, nx_dof_symbols = fem_function_on_element(
+            normals_component_function_space,
+            geometry,
+            symbolizer,
+            domain="reference",
+            function_id="nx",
+            basis_eval=phi_eval_symbols,
+        )
+
+        ny, ny_dof_symbols = fem_function_on_element(
+            normals_component_function_space,
+            geometry,
+            symbolizer,
+            domain="reference",
+            function_id="ny",
+            basis_eval=phi_eval_symbols,
+        )
+
+        n_dof_symbols = [nx_dof_symbols, ny_dof_symbols]
+
+        if geometry.dimensions == 3:
+            nz, nz_dof_symbols = fem_function_on_element(
+                normals_component_function_space,
+                geometry,
+                symbolizer,
+                domain="reference",
+                function_id="nz",
+                basis_eval=phi_eval_symbols,
+            )
+            n_dof_symbols = [nx_dof_symbols, ny_dof_symbols, nz_dof_symbols]
+
+        rotmat = calculate_rotation_matrix(
+            normals_function_space, geometry, n_dof_symbols
+        )
+
+        for data in it:
+            if transpose:
+                phi = data.trial_shape
+                grad_phi = data.test_shape_grad
+            else:
+                phi = data.test_shape
+                grad_phi = data.trial_shape_grad
+
+            form = (
+                -(jac_blending_inv.T * jac_affine_inv.T * grad_phi).trace()
+                * phi
+                * jac_affine_det
+                * jac_blending_det
+            )
+
+            mat[data.row, data.col] = form
+
+    return Form(
+        mat,
+        Tabulation(symbolizer),
+        symmetric=False,
+        docstring=docstring,
+        rotmat=rotmat,
+        rot_type=RotationType.PRE_MULTIPLY if transpose else RotationType.POST_MULTIPLY,
+    )
+
+
 def gradient(
     trial: FunctionSpace,
     test: FunctionSpace,
@@ -1388,7 +1835,9 @@ The resulting matrix must be multiplied with a vector of ones to be used as the
                 basis_eval=phi_eval_symbols,
             )
         else:
-            raise HOGException("scalar_space_dependent_coefficient currently not supported in opgen.")
+            raise HOGException(
+                "scalar_space_dependent_coefficient currently not supported in opgen."
+            )
             # mu = scalar_space_dependent_coefficient(
             #     "mu", geometry, symbolizer, blending=blending
             # )
@@ -1433,7 +1882,6 @@ The resulting matrix must be multiplied with a vector of ones to be used as the
                 dof_symbols=dof_symbols_uy,
             )
 
-
             # if geometry.dimensions > 2:
             uz, dof_symbols_uz = fem_function_on_element(
                 velocity_function_space,
@@ -1488,21 +1936,17 @@ The resulting matrix must be multiplied with a vector of ones to be used as the
         for data in it:
             phi = data.trial_shape
             psi = data.test_shape
-            
+
             if blending != IdentityMap():
                 affine_factor = (
                     tabulation.register_factor(
                         "affine_factor_symbol",
                         sp.Matrix([phi * psi * jac_affine_det]),
                     )
-                )[
-                    0
-                ]
+                )[0]
                 form = (
                     mu[0]
-                    * (
-                        double_contraction(tau, grad_u)[0]
-                    )
+                    * (double_contraction(tau, grad_u)[0])
                     * jac_blending_det
                     * affine_factor
                 )
@@ -1510,19 +1954,10 @@ The resulting matrix must be multiplied with a vector of ones to be used as the
                 shear_heating_det_symbol = (
                     tabulation.register_factor(
                         "shear_heating_det_symbol",
-                        (
-                            double_contraction(tau, grad_u)
-                        )
-                        * phi
-                        * psi 
-                        * jac_affine_det,
+                        (double_contraction(tau, grad_u)) * phi * psi * jac_affine_det,
                     )
-                )[
-                    0
-                ]
-                form = (
-                    mu[0] * shear_heating_det_symbol
-                )
+                )[0]
+                form = mu[0] * shear_heating_det_symbol
 
             mat[data.row, data.col] = form
 
@@ -1534,7 +1969,6 @@ The resulting matrix must be multiplied with a vector of ones to be used as the
     )
 
 
-
 def divdiv(
     trial: FunctionSpace,
     test: FunctionSpace,
@@ -1695,7 +2129,9 @@ Weak formulation
             jac_blending_det = symbolizer.abs_det_jac_affine_to_blending()
             if not isinstance(blending, IdentityMap):
                 # hessian_blending_map = blending.hessian(affine_coords)
-                hessian_blending_map = symbolizer.hessian_blending_map(geometry.dimensions)
+                hessian_blending_map = symbolizer.hessian_blending_map(
+                    geometry.dimensions
+                )
 
         # jac_blending_det = abs(det(jac_blending))
         # with TimedLogger("inverting blending Jacobian", level=logging.DEBUG):
diff --git a/hog/operator_generation/operators.py b/hog/operator_generation/operators.py
index 34b28661fe21af58c4e2168cab54a6680308f009..1a8cc544d9220aa4659b6a3ac3984f96b7302195 100644
--- a/hog/operator_generation/operators.py
+++ b/hog/operator_generation/operators.py
@@ -70,7 +70,7 @@ from pystencils.typing.transformations import add_types
 from pystencils.backends.cbackend import CustomCodeNode
 from pystencils.typing.typed_sympy import FieldPointerSymbol
 
-from hog.forms import Form
+from hog.forms import Form, RotationType
 from hog.ast import Operations, count_operations
 from hog.blending import GeometryMap
 import hog.code_generation
@@ -256,6 +256,16 @@ class HyTeGElementwiseOperator:
                                 mat[row, col], self.symbolizer, blending
                             )
 
+        if not form.rotmat.is_zero_matrix:
+            if form.rot_type == RotationType.PRE_AND_POST_MULTIPLY:
+                mat = form.rotmat * mat * form.rotmat.T
+            elif form.rot_type == RotationType.PRE_MULTIPLY:
+                mat = form.rotmat * mat
+            elif form.rot_type == RotationType.POST_MULTIPLY:
+                mat = mat * form.rotmat.T
+            else:
+                HOGException("Not implemented")
+
         self.element_matrices[dim] = IntegrationInfo(
             geometry=geometry,
             integration_domain=integration_domain,