diff --git a/generate_all_operators.py b/generate_all_operators.py
index a3a4bf0a71117e8ea8d7c01d0d3afb1f5351f74d..2f81aeefce7ebb8c46278109dcb12d4b29bb5e3b 100644
--- a/generate_all_operators.py
+++ b/generate_all_operators.py
@@ -53,10 +53,10 @@ from hog.function_space import (
 )
 from hog.logger import get_logger, TimedLogger
 from hog.operator_generation.kernel_types import (
-    Apply,
-    Assemble,
-    AssembleDiagonal,
-    KernelType,
+    ApplyWrapper,
+    AssembleWrapper,
+    AssembleDiagonalWrapper,
+    KernelWrapperType,
 )
 from hog.operator_generation.loop_strategies import (
     LoopStrategy,
@@ -354,7 +354,7 @@ def main():
             args.quad_degree if args.quad_rule is None else args.quad_rule,
         )
     }
-    
+
     enabled_geometries: Set[TriangleElement | TetrahedronElement] = set()
     if 2 in args.dimensions:
         enabled_geometries.add(TriangleElement())
@@ -488,7 +488,7 @@ class OperatorInfo:
         sp.Matrix,
     ]
     type_descriptor: HOGType
-    kernel_types: List[KernelType] = None  # type: ignore[assignment] # will definitely be initialized in __post_init__
+    kernel_types: List[KernelWrapperType] = None  # type: ignore[assignment] # will definitely be initialized in __post_init__
     geometries: Sequence[ElementGeometry] = field(
         default_factory=lambda: [TriangleElement(), TetrahedronElement()]
     )
@@ -507,7 +507,7 @@ class OperatorInfo:
         if self.kernel_types is None:
             dims = [g.dimensions for g in self.geometries]
             self.kernel_types = [
-                Apply(
+                ApplyWrapper(
                     self.test_space,
                     self.trial_space,
                     type_descriptor=self.type_descriptor,
@@ -518,7 +518,7 @@ class OperatorInfo:
             all_opts = set().union(*[o for (o, _, _) in self.opts])
             if not ({Opts.VECTORIZE, Opts.VECTORIZE512}.intersection(all_opts)):
                 self.kernel_types.append(
-                    Assemble(
+                    AssembleWrapper(
                         self.test_space,
                         self.trial_space,
                         type_descriptor=self.type_descriptor,
@@ -528,8 +528,10 @@ class OperatorInfo:
 
             if self.test_space == self.trial_space:
                 self.kernel_types.append(
-                    AssembleDiagonal(
-                        self.test_space, type_descriptor=self.type_descriptor, dims=dims
+                    AssembleDiagonalWrapper(
+                        self.test_space,
+                        type_descriptor=self.type_descriptor,
+                        dims=dims,
                     )
                 )
 
@@ -591,7 +593,7 @@ def all_operators(
     ops.append(OperatorInfo(mapping="P2", name="SUPGDiffusion", trial_space=P2, test_space=P2, 
                             form=partial(supg_diffusion, velocity_function_space=P2, diffusivityXdelta_function_space=P2), 
                             type_descriptor=type_descriptor, geometries=list(geometries), opts=opts, blending=blending))
-    
+
     # fmt: on
 
     p2vec_epsilon = partial(
@@ -703,7 +705,7 @@ def generate_elementwise_op(
         name,
         symbolizer,
         opts=optimizations,
-        kernel_types=op_info.kernel_types,
+        kernel_wrapper_types=op_info.kernel_types,
         type_descriptor=type_descriptor,
     )
 
@@ -725,19 +727,20 @@ def generate_elementwise_op(
             blending=blending,  # type: ignore[call-arg] # kw-args are not supported by Callable
         )
 
-        operator.set_element_matrix(
+        operator.add_integral(
+            name="".join(name.split()),
             dim=geometry.dimensions,
             geometry=geometry,
             integration_domain=MacroIntegrationDomain.VOLUME,
             quad=quad,
             blending=blending,
             form=form,
+            loop_strategy=loop_strategy,
         )
 
     dir_path = os.path.join(args.output, op_info.name.split("_")[0])
     operator.generate_class_code(
         dir_path,
-        loop_strategy=loop_strategy,
         clang_format_binary=args.clang_format_binary,
     )
 
diff --git a/hog/operator_generation/kernel_types.py b/hog/operator_generation/kernel_types.py
index 778077cd48eff87879dc4bccebc868293eefa71d..9050623c9d65c5a87c88ce944bf9e01fbc615d87 100644
--- a/hog/operator_generation/kernel_types.py
+++ b/hog/operator_generation/kernel_types.py
@@ -42,16 +42,54 @@ from hog.operator_generation.function_space_impls import FunctionSpaceImpl
 from hog.operator_generation.indexing import FaceType, CellType
 from hog.operator_generation.pystencils_extensions import create_generic_fields
 from hog.operator_generation.types import HOGPrecision, HOGType, hyteg_type
+from hog.operator_generation.loop_strategies import LoopStrategy, SAWTOOTH
 
 
 class KernelType(ABC):
-    name: str
-    src_fields: List[FunctionSpaceImpl]
-    dst_fields: List[FunctionSpaceImpl]
-    _template: Template
-
-    """Type to specify the type of kernel.
-    E.g. a GEMv kernel defines multiple source fields and scalar parameters."""
+    """
+    A HyTeG operator may implement multiple types of methods that perform some kind of operation on a function that
+    is inferred from the bilinear form. For instance, a (matrix-free) matrix-vector multiplication or the assembly
+    of its diagonal.
+
+    Certain operations such as setting boundary data to zero or communication have to be executed before the actual
+    kernel can be executed.
+
+    E.g.:
+
+    ```
+    void apply() {
+        communication()       // "pre-processing"
+        kernel()              // actual kernel
+        post_communication()  // "post-processing"
+    }
+    ```
+
+    For some applications, it might be necessary or at least comfortable to execute multiple kernels inside such a
+    method. For instance, if boundary conditions are applied:
+
+    ```
+    // form is sum of volume and boundary integral:
+    //     ∫ ... dΩ + ∫ ... dS
+    void assemble() {
+        communication()       // "pre-processing"
+        kernel()              // volume kernel (∫ ... dΩ)
+        kernel_boundary()     // boundary kernel (∫ ... dS | additive execution)
+        post_communication()  // "post-processing"
+    }
+    ```
+
+    This class (KernelType) describes the "action" of the kernel (matvec, assembly, ...).
+    Another class (KernelWrapperType) then describes what kind of pre- and post-processing is required.
+
+    ```
+    void assemble() {         // from KernelTypeWrapper
+        communication()       // from KernelTypeWrapper
+        kernel()              // from KernelType + IntegrationInfo 1
+        kernel_boundary()     // from KernelType + IntegrationInfo 2
+        post_communication()  // from KernelTypeWrapper
+    }
+    ```
+    """
 
     @abstractmethod
     def kernel_operation(
@@ -90,27 +128,243 @@ class KernelType(ABC):
         """
         ...
 
+
+class Apply(KernelType):
+    def __init__(self):
+        self.result_prefix = "elMatVec_"
+
+    def kernel_operation(
+        self,
+        src_vecs: List[sp.MatrixBase],
+        dst_vecs: List[sp.MatrixBase],
+        mat: sp.MatrixBase,
+        rows: int,
+    ) -> List[SympyAssignment]:
+        kernel_ops = mat * src_vecs[0]
+
+        tmp_symbols = sp.numbered_symbols(self.result_prefix)
+
+        kernel_op_assignments = [
+            SympyAssignment(tmp, kernel_op)
+            for tmp, kernel_op in zip(tmp_symbols, kernel_ops)
+        ]
+
+        return kernel_op_assignments
+
+    def kernel_post_operation(
+        self,
+        geometry: ElementGeometry,
+        element_index: Tuple[int, int, int],
+        element_type: Union[FaceType, CellType],
+        src_vecs_accesses: List[List[Field.Access]],
+        dst_vecs_accesses: List[List[Field.Access]],
+    ) -> List[ps.astnodes.Node]:
+        tmp_symbols = sp.numbered_symbols(self.result_prefix)
+
+        # Add and store result to destination.
+        store_dst_vecs = [
+            SympyAssignment(a, a + s) for a, s in zip(dst_vecs_accesses[0], tmp_symbols)
+        ]
+        return store_dst_vecs
+
+
+class AssembleDiagonal(KernelType):
+    def __init__(self):
+        self.result_prefix = "elMatDiag_"
+
+    def kernel_operation(
+        self,
+        src_vecs: List[sp.MatrixBase],
+        dst_vecs: List[sp.MatrixBase],
+        mat: sp.MatrixBase,
+        rows: int,
+    ) -> List[SympyAssignment]:
+        kernel_ops = mat.diagonal()
+
+        tmp_symbols = sp.numbered_symbols(self.result_prefix)
+
+        kernel_op_assignments = [
+            SympyAssignment(tmp, kernel_op)
+            for tmp, kernel_op in zip(tmp_symbols, kernel_ops)
+        ]
+
+        return kernel_op_assignments
+
+    def kernel_post_operation(
+        self,
+        geometry: ElementGeometry,
+        element_index: Tuple[int, int, int],
+        element_type: Union[FaceType, CellType],
+        src_vecs_accesses: List[List[Field.Access]],
+        dst_vecs_accesses: List[List[Field.Access]],
+    ) -> List[ps.astnodes.Node]:
+        tmp_symbols = sp.numbered_symbols(self.result_prefix)
+
+        # Add and store result to destination.
+        store_dst_vecs = [
+            SympyAssignment(a, a + s) for a, s in zip(dst_vecs_accesses[0], tmp_symbols)
+        ]
+        return store_dst_vecs
+
+
+class Assemble(KernelType):
+    def __init__(
+        self,
+        src_space: FunctionSpace,
+        dst_space: FunctionSpace,
+    ):
+        self.result_prefix = "elMat_"
+        idx_t = HOGType("idx_t", np.int64)
+
+        self.src: FunctionSpaceImpl = FunctionSpaceImpl.create_impl(
+            src_space, "src", idx_t
+        )
+        self.dst: FunctionSpaceImpl = FunctionSpaceImpl.create_impl(
+            dst_space, "dst", idx_t
+        )
+
+    def kernel_operation(
+        self,
+        src_vecs: List[sp.MatrixBase],
+        dst_vecs: List[sp.MatrixBase],
+        mat: sp.MatrixBase,
+        rows: int,
+    ) -> List[SympyAssignment]:
+        return [
+            SympyAssignment(sp.Symbol(f"{self.result_prefix}{r}_{c}"), mat[r, c])
+            for r in range(mat.shape[0])
+            for c in range(mat.shape[1])
+        ]
+
+    def kernel_post_operation(
+        self,
+        geometry: ElementGeometry,
+        element_index: Tuple[int, int, int],
+        element_type: Union[FaceType, CellType],
+        src_vecs_accesses: List[List[Field.Access]],
+        dst_vecs_accesses: List[List[Field.Access]],
+    ) -> List[ps.astnodes.Node]:
+        src, dst = dst_vecs_accesses
+        el_mat = sp.Matrix(
+            [
+                [sp.Symbol(f"{self.result_prefix}{r}_{c}") for c in range(len(src))]
+                for r in range(len(dst))
+            ]
+        )
+
+        # apply basis/dof transformations
+        transform_src_code, transform_src_mat = self.src.dof_transformation(
+            geometry, element_index, element_type
+        )
+        transform_dst_code, transform_dst_mat = self.dst.dof_transformation(
+            geometry, element_index, element_type
+        )
+        transformed_el_mat = transform_dst_mat.T * el_mat * transform_src_mat
+
+        nr = len(dst)
+        nc = len(src)
+        mat_size = nr * nc
+
+        rowIdx, colIdx = create_generic_fields(["rowIdx", "colIdx"], dtype=np.uint64)
+        # NOTE: The type is 'hyteg_type().pystencils_type', i.e. `real_t`
+        #       (not `self._type_descriptor.pystencils_type`)
+        #       because this function is implemented manually in HyTeG with
+        #       this signature. Passing `np.float64` is not ideal (if `real_t !=
+        #       double`) but it makes sure that casts are inserted if necessary
+        #       (though some might be superfluous).
+        mat = create_generic_fields(
+            ["mat"], dtype=hyteg_type(HOGPrecision.REAL_T).pystencils_type
+        )[0]
+        rowIdxSymb = FieldPointerSymbol(rowIdx.name, rowIdx.dtype, False)
+        colIdxSymb = FieldPointerSymbol(colIdx.name, colIdx.dtype, False)
+        matSymb = FieldPointerSymbol(mat.name, mat.dtype, False)
+
+        body: List[ps.astnodes.Node] = [
+            CustomCodeNode(
+                f"std::vector< uint_t > {rowIdxSymb.name}( {nr} );\n"
+                f"std::vector< uint_t > {colIdxSymb.name}( {nc} );\n"
+                f"std::vector< {mat.dtype} > {matSymb.name}( {mat_size} );",
+                [],
+                [rowIdxSymb, colIdxSymb, matSymb],
+            ),
+            ps.astnodes.EmptyLine(),
+        ]
+        body += [
+            SympyAssignment(
+                rowIdx.absolute_access((k,), (0,)), CastFunc(dst_access, np.uint64)
+            )
+            for k, dst_access in enumerate(dst)
+        ]
+        body += [
+            SympyAssignment(
+                colIdx.absolute_access((k,), (0,)), CastFunc(src_access, np.uint64)
+            )
+            for k, src_access in enumerate(src)
+        ]
+
+        body += [
+            ps.astnodes.EmptyLine(),
+            ps.astnodes.SourceCodeComment("Apply basis transformation"),
+            *sorted({transform_src_code, transform_dst_code}, key=lambda n: n._code),
+            ps.astnodes.EmptyLine(),
+        ]
+
+        body += [
+            SympyAssignment(
+                mat.absolute_access((r * nc + c,), (0,)),
+                CastFunc(transformed_el_mat[r, c], mat.dtype),
+            )
+            for r in range(nr)
+            for c in range(nc)
+        ]
+
+        body += [
+            ps.astnodes.EmptyLine(),
+            CustomCodeNode(
+                f"mat->addValues( {rowIdxSymb.name}, {colIdxSymb.name}, {matSymb.name} );",
+                [
+                    TypedSymbol("mat", "std::shared_ptr< SparseMatrixProxy >"),
+                    rowIdxSymb,
+                    colIdxSymb,
+                    matSymb,
+                ],
+                [],
+            ),
+        ]
+
+        return body
+
+
+class KernelWrapperType(ABC):
+    name: str
+    src_fields: List[FunctionSpaceImpl]
+    dst_fields: List[FunctionSpaceImpl]
+    _template: Template
+    """
+    See documentation of class KernelType. 
+    """
+
+    @property
     @abstractmethod
-    def includes(self) -> Set[str]:
-        ...
+    def kernel_type(self) -> KernelType: ...
 
     @abstractmethod
-    def base_classes(self) -> List[str]:
-        ...
+    def includes(self) -> Set[str]: ...
 
     @abstractmethod
-    def wrapper_methods(self) -> List[CppMethod]:
-        ...
+    def base_classes(self) -> List[str]: ...
 
     @abstractmethod
-    def member_variables(self) -> List[CppMemberVariable]:
-        ...
+    def wrapper_methods(self) -> List[CppMethod]: ...
+
+    @abstractmethod
+    def member_variables(self) -> List[CppMemberVariable]: ...
 
     def substitute(self, subs: Mapping[str, object]) -> None:
         self._template = Template(self._template.safe_substitute(subs))
 
 
-class Apply(KernelType):
+class ApplyWrapper(KernelWrapperType):
     def __init__(
         self,
         src_space: FunctionSpace,
@@ -224,39 +478,9 @@ class Apply(KernelType):
             f'this->stopTiming( "{self.name}" );'
         )
 
-    def kernel_operation(
-        self,
-        src_vecs: List[sp.MatrixBase],
-        dst_vecs: List[sp.MatrixBase],
-        mat: sp.MatrixBase,
-        rows: int,
-    ) -> List[SympyAssignment]:
-        kernel_ops = mat * src_vecs[0]
-
-        tmp_symbols = sp.numbered_symbols(self.result_prefix)
-
-        kernel_op_assignments = [
-            SympyAssignment(tmp, kernel_op)
-            for tmp, kernel_op in zip(tmp_symbols, kernel_ops)
-        ]
-
-        return kernel_op_assignments
-
-    def kernel_post_operation(
-        self,
-        geometry: ElementGeometry,
-        element_index: Tuple[int, int, int],
-        element_type: Union[FaceType, CellType],
-        src_vecs_accesses: List[List[Field.Access]],
-        dst_vecs_accesses: List[List[Field.Access]],
-    ) -> List[ps.astnodes.Node]:
-        tmp_symbols = sp.numbered_symbols(self.result_prefix)
-
-        # Add and store result to destination.
-        store_dst_vecs = [
-            SympyAssignment(a, a + s) for a, s in zip(dst_vecs_accesses[0], tmp_symbols)
-        ]
-        return store_dst_vecs
+    @property
+    def kernel_type(self) -> KernelType:
+        return Apply()
 
     def includes(self) -> Set[str]:
         return (
@@ -302,7 +526,7 @@ class Apply(KernelType):
         return []
 
 
-class GEMV(KernelType):
+class GEMVWrapper(KernelWrapperType):
     def __init__(
         self,
         type_descriptor: HOGType,
@@ -325,7 +549,7 @@ class GEMV(KernelType):
         raise HOGException("Not implemented")
 
 
-class AssembleDiagonal(KernelType):
+class AssembleDiagonalWrapper(KernelWrapperType):
     def __init__(
         self,
         fe_space: FunctionSpace,
@@ -340,7 +564,6 @@ class AssembleDiagonal(KernelType):
         self.src_fields = []
         self.dst_fields = [self.dst]
         self.dims = dims
-        self.result_prefix = "elMatDiag_"
 
         def macro_loop(dim: int) -> str:
             Macro = {2: "Face", 3: "Cell"}[dim]
@@ -413,39 +636,9 @@ class AssembleDiagonal(KernelType):
             f'this->stopTiming( "{self.name}" );'
         )
 
-    def kernel_operation(
-        self,
-        src_vecs: List[sp.MatrixBase],
-        dst_vecs: List[sp.MatrixBase],
-        mat: sp.MatrixBase,
-        rows: int,
-    ) -> List[SympyAssignment]:
-        kernel_ops = mat.diagonal()
-
-        tmp_symbols = sp.numbered_symbols(self.result_prefix)
-
-        kernel_op_assignments = [
-            SympyAssignment(tmp, kernel_op)
-            for tmp, kernel_op in zip(tmp_symbols, kernel_ops)
-        ]
-
-        return kernel_op_assignments
-
-    def kernel_post_operation(
-        self,
-        geometry: ElementGeometry,
-        element_index: Tuple[int, int, int],
-        element_type: Union[FaceType, CellType],
-        src_vecs_accesses: List[List[Field.Access]],
-        dst_vecs_accesses: List[List[Field.Access]],
-    ) -> List[ps.astnodes.Node]:
-        tmp_symbols = sp.numbered_symbols(self.result_prefix)
-
-        # Add and store result to destination.
-        store_dst_vecs = [
-            SympyAssignment(a, a + s) for a, s in zip(dst_vecs_accesses[0], tmp_symbols)
-        ]
-        return store_dst_vecs
+    @property
+    def kernel_type(self) -> KernelType:
+        return AssembleDiagonal()
 
     def includes(self) -> Set[str]:
         return {"hyteg/solvers/Smoothables.hpp"} | self.dst.includes()
@@ -483,7 +676,7 @@ class AssembleDiagonal(KernelType):
         ]
 
 
-class Assemble(KernelType):
+class AssembleWrapper(KernelWrapperType):
     def __init__(
         self,
         src_space: FunctionSpace,
@@ -508,7 +701,6 @@ class Assemble(KernelType):
 
         self.type_descriptor = type_descriptor
         self.dims = dims
-        self.result_prefix = "elMat_"
 
         def macro_loop(dim: int) -> str:
             Macro = {2: "Face", 3: "Cell"}[dim]
@@ -563,116 +755,9 @@ class Assemble(KernelType):
             f'this->stopTiming( "{self.name}" );'
         )
 
-    def kernel_operation(
-        self,
-        src_vecs: List[sp.MatrixBase],
-        dst_vecs: List[sp.MatrixBase],
-        mat: sp.MatrixBase,
-        rows: int,
-    ) -> List[SympyAssignment]:
-        return [
-            SympyAssignment(sp.Symbol(f"{self.result_prefix}{r}_{c}"), mat[r, c])
-            for r in range(mat.shape[0])
-            for c in range(mat.shape[1])
-        ]
-
-    def kernel_post_operation(
-        self,
-        geometry: ElementGeometry,
-        element_index: Tuple[int, int, int],
-        element_type: Union[FaceType, CellType],
-        src_vecs_accesses: List[List[Field.Access]],
-        dst_vecs_accesses: List[List[Field.Access]],
-    ) -> List[ps.astnodes.Node]:
-        src, dst = dst_vecs_accesses
-        el_mat = sp.Matrix(
-            [
-                [sp.Symbol(f"{self.result_prefix}{r}_{c}") for c in range(len(src))]
-                for r in range(len(dst))
-            ]
-        )
-
-        # apply basis/dof transformations
-        transform_src_code, transform_src_mat = self.src.dof_transformation(
-            geometry, element_index, element_type
-        )
-        transform_dst_code, transform_dst_mat = self.dst.dof_transformation(
-            geometry, element_index, element_type
-        )
-        transformed_el_mat = transform_dst_mat.T * el_mat * transform_src_mat
-
-        nr = len(dst)
-        nc = len(src)
-        mat_size = nr * nc
-
-        rowIdx, colIdx = create_generic_fields(["rowIdx", "colIdx"], dtype=np.uint64)
-        # NOTE: The type is 'hyteg_type().pystencils_type', i.e. `real_t`
-        #       (not `self._type_descriptor.pystencils_type`)
-        #       because this function is implemented manually in HyTeG with
-        #       this signature. Passing `np.float64` is not ideal (if `real_t !=
-        #       double`) but it makes sure that casts are inserted if necessary
-        #       (though some might be superfluous).
-        mat = create_generic_fields(
-            ["mat"], dtype=hyteg_type(HOGPrecision.REAL_T).pystencils_type
-        )[0]
-        rowIdxSymb = FieldPointerSymbol(rowIdx.name, rowIdx.dtype, False)
-        colIdxSymb = FieldPointerSymbol(colIdx.name, colIdx.dtype, False)
-        matSymb = FieldPointerSymbol(mat.name, mat.dtype, False)
-
-        body: List[ps.astnodes.Node] = [
-            CustomCodeNode(
-                f"std::vector< uint_t > {rowIdxSymb.name}( {nr} );\n"
-                f"std::vector< uint_t > {colIdxSymb.name}( {nc} );\n"
-                f"std::vector< {mat.dtype} > {matSymb.name}( {mat_size} );",
-                [],
-                [rowIdxSymb, colIdxSymb, matSymb],
-            ),
-            ps.astnodes.EmptyLine(),
-        ]
-        body += [
-            SympyAssignment(
-                rowIdx.absolute_access((k,), (0,)), CastFunc(dst_access, np.uint64)
-            )
-            for k, dst_access in enumerate(dst)
-        ]
-        body += [
-            SympyAssignment(
-                colIdx.absolute_access((k,), (0,)), CastFunc(src_access, np.uint64)
-            )
-            for k, src_access in enumerate(src)
-        ]
-
-        body += [
-            ps.astnodes.EmptyLine(),
-            ps.astnodes.SourceCodeComment("Apply basis transformation"),
-            *sorted({transform_src_code, transform_dst_code}, key=lambda n: n._code),
-            ps.astnodes.EmptyLine(),
-        ]
-
-        body += [
-            SympyAssignment(
-                mat.absolute_access((r * nc + c,), (0,)),
-                CastFunc(transformed_el_mat[r, c], mat.dtype),
-            )
-            for r in range(nr)
-            for c in range(nc)
-        ]
-
-        body += [
-            ps.astnodes.EmptyLine(),
-            CustomCodeNode(
-                f"mat->addValues( {rowIdxSymb.name}, {colIdxSymb.name}, {matSymb.name} );",
-                [
-                    TypedSymbol("mat", "std::shared_ptr< SparseMatrixProxy >"),
-                    rowIdxSymb,
-                    colIdxSymb,
-                    matSymb,
-                ],
-                [],
-            ),
-        ]
-
-        return body
+    @property
+    def kernel_type(self) -> KernelType:
+        return Assemble(self.src.fe_space, self.dst.fe_space)
 
     def includes(self) -> Set[str]:
         return (
diff --git a/hog/operator_generation/operators.py b/hog/operator_generation/operators.py
index 34b28661fe21af58c4e2168cab54a6680308f009..2eb16fc268b28e80f33601fe340d2b5fc87eba76 100644
--- a/hog/operator_generation/operators.py
+++ b/hog/operator_generation/operators.py
@@ -83,8 +83,8 @@ from hog.operator_generation.indexing import (
     element_vertex_coordinates,
     IndexingInfo,
 )
-from hog.operator_generation.kernel_types import Assemble, KernelType
-from hog.operator_generation.loop_strategies import LoopStrategy, SAWTOOTH
+from hog.operator_generation.kernel_types import KernelWrapperType, KernelType, Assemble
+from hog.operator_generation.loop_strategies import LoopStrategy, SAWTOOTH, CUBES
 from hog.operator_generation.optimizer import Optimizer, Opts
 from hog.quadrature import QuadLoop, Quadrature
 from hog.symbolizer import Symbolizer
@@ -100,6 +100,8 @@ class MacroIntegrationDomain(Enum):
 
 @dataclass
 class IntegrationInfo:
+    """Data associated with one integral term and the corresponding loop pattern."""
+
     geometry: ElementGeometry  # geometry of the element, e.g. tetrahedron
     integration_domain: (
         MacroIntegrationDomain  # entity of geometry to integrate over, e.g. facet
@@ -111,22 +113,27 @@ class IntegrationInfo:
     quad_loop: Optional[QuadLoop]
     mat: sp.MatrixBase
 
+    loop_strategy: LoopStrategy
+
+    name: str = "unknown_integral"
     docstring: str = ""
 
     def _str_(self):
-        return f"Integration Info: {self.geometry}, {self.integration_domain}, mat shape {self.mat.shape}, quad degree {self.quad.degree}, blending {self.blending}"
+        return f"Integration Info: {self.name}, {self.geometry}, {self.integration_domain}, mat shape {self.mat.shape}, quad degree {self.quad.degree}, blending {self.blending}"
 
     def _repr_(self):
         return str(self)
 
 
 @dataclass
-class Kernel:
-    kernel_type: KernelType
-    kernel_function: KernelFunction
-    platform_dependent_funcs: Dict[str, KernelFunction]
-    operation_count: str
-    integration_info: IntegrationInfo
+class OperatorMethod:
+    """Collection of kernels and metadata required for each method of an operator."""
+
+    kernel_wrapper_type: KernelWrapperType
+    kernel_functions: List[KernelFunction]
+    platform_dependent_funcs: List[Dict[str, KernelFunction]]
+    operation_counts: List[str]
+    integration_infos: List[IntegrationInfo]
 
 
 class CppClassFiles(Enum):
@@ -163,16 +170,16 @@ class HyTeGElementwiseOperator:
         self,
         name: str,
         symbolizer: Symbolizer,
-        kernel_types: List[KernelType],
+        kernel_wrapper_types: List[KernelWrapperType],
         opts: Set[Opts],
         type_descriptor: HOGType,
     ):
         self.name = name
         self.symbolizer = symbolizer
-        self.kernel_types = kernel_types  # type of kernel: e.g. GEMV
+        self.kernel_wrapper_types = kernel_wrapper_types  # type of kernel: e.g. GEMV
 
-        # Holds one element matrix and quad scheme for each ElementGeometry.
-        self.element_matrices: Dict[int, IntegrationInfo] = {}
+        # Each IntegrationInfo object represents one integral of the weak formulation.
+        self.integration_infos: Dict[int, List[IntegrationInfo]] = {}
 
         self._optimizer = Optimizer(opts)
         self._optimizer_no_vec = Optimizer(opts - {Opts.VECTORIZE, Opts.VECTORIZE512})
@@ -183,30 +190,40 @@ class HyTeGElementwiseOperator:
         # coefficients
         self.coeffs: Dict[str, FunctionSpaceImpl] = {}
         # implementations for each kernel, generated at a later stage
-        self.kernels: List[Kernel] = []
+        self.operator_methods: List[OperatorMethod] = []
 
-    def set_element_matrix(
+    def add_integral(
         self,
+        name: str,
         dim: int,
         geometry: ElementGeometry,
         integration_domain: MacroIntegrationDomain,
         quad: Quadrature,
         blending: GeometryMap,
         form: Form,
+        loop_strategy: LoopStrategy,
     ) -> None:
         """
-        Use this method to add element matrices to the operator.
+        Use this method to add integrals to the operator.
 
+        :param name: some name for this integral (no spaces please)
         :param dim: the dimensionality of the domain, i.e. the volume - it may be that this does not match the geometry
                     of the element, e.g., when facet integrals are required, note that only one routine per dim is
                     created
         :param geometry: geometry that shall be integrated over
         :param integration_domain: where to integrate - see MacroIntegrationDomain
-        :param mat: the local element matrix
         :param quad: the employed quadrature scheme - should match what has been used to integrate the weak form
         :param blending: the same geometry map that has been passed to the form
+        :param form: the integrand
+        :param loop_strategy: loop pattern over the refined macro-volume - must somehow be compatible with the
+                              integration domain
         """
 
+        if "".join(name.split()) != name:
+            raise HOGException(
+                "Please give the integral an identifier without white space."
+            )
+
         if dim not in [2, 3]:
             raise HOGException("Only supporting 2D and 3D. Dim should be in [2, 3]")
 
@@ -215,11 +232,10 @@ class HyTeGElementwiseOperator:
         if dim != geometry.dimensions:
             raise HOGException("Only volume integrals supported as of now.")
 
-        if dim in self.element_matrices:
-            raise HOGException(
-                "You are trying to overwrite an already specified integration routine by calling this method twice with "
-                "the same dim argument."
-            )
+        if integration_domain == MacroIntegrationDomain.VOLUME and not (
+            isinstance(loop_strategy, SAWTOOTH) or isinstance(loop_strategy, CUBES)
+        ):
+            raise HOGException("Invalid loop strategy for volume integrals.")
 
         tables = []
         quad_loop = None
@@ -256,15 +272,22 @@ class HyTeGElementwiseOperator:
                                 mat[row, col], self.symbolizer, blending
                             )
 
-        self.element_matrices[dim] = IntegrationInfo(
-            geometry=geometry,
-            integration_domain=integration_domain,
-            quad=quad,
-            blending=blending,
-            tables=tables,
-            quad_loop=quad_loop,
-            mat=mat,
-            docstring=form.docstring,
+        if dim not in self.integration_infos:
+            self.integration_infos[dim] = []
+
+        self.integration_infos[dim].append(
+            IntegrationInfo(
+                name=name,
+                geometry=geometry,
+                integration_domain=integration_domain,
+                quad=quad,
+                blending=blending,
+                tables=tables,
+                quad_loop=quad_loop,
+                mat=mat,
+                docstring=form.docstring,
+                loop_strategy=loop_strategy,
+            )
         )
 
     def coefficients(self) -> List[FunctionSpaceImpl]:
@@ -282,7 +305,6 @@ class HyTeGElementwiseOperator:
     def generate_class_code(
         self,
         dir_path: str,
-        loop_strategy: LoopStrategy = SAWTOOTH(),
         class_files: CppClassFiles = CppClassFiles.HEADER_AND_IMPL,
         clang_format_binary: Optional[str] = None,
     ) -> None:
@@ -297,29 +319,30 @@ class HyTeGElementwiseOperator:
         """
 
         # Asking the optimizer if optimizations are valid.
-        self._optimizer.check_opts_validity(loop_strategy)
+        self._optimizer.check_opts_validity()
         # Generate each kernel type (apply, gemv, ...).
-        self.generate_kernels(loop_strategy)
+        self.generate_kernels()
 
         # Setting up the final C++ class.
         operator_cpp_class = CppClass(
             name=self.name,
             base_classes=sorted(
-                {base for kt in self.kernel_types for base in kt.base_classes()}
+                {base for kt in self.kernel_wrapper_types for base in kt.base_classes()}
             ),
         )
 
         # Adding form docstring to C++ class
         form_docstrings = set()
-        for d, io in self.element_matrices.items():
-            form_docstrings.add(io.docstring)
+        for d, ios in self.integration_infos.items():
+            for io in ios:
+                form_docstrings.add(io.docstring)
         for form_docstring in form_docstrings:
             form_docstring_with_slashes = "/// ".join(form_docstring.splitlines(True))
             operator_cpp_class.add(CppComment(form_docstring_with_slashes, where="all"))
 
-        for kernel_type in self.kernel_types:
+        for kernel_wrapper_type in self.kernel_wrapper_types:
             # Setting up communication.
-            kernel_type.substitute(
+            kernel_wrapper_type.substitute(
                 {
                     "comm_fe_functions_2D": "\n".join(
                         coeff.pre_communication(2) for coeff in self.coefficients()
@@ -331,48 +354,79 @@ class HyTeGElementwiseOperator:
             )
 
             # Wrapper methods ("hand-crafted")
-            for kernel_wrapper_cpp_method in kernel_type.wrapper_methods():
+            for kernel_wrapper_cpp_method in kernel_wrapper_type.wrapper_methods():
                 operator_cpp_class.add(kernel_wrapper_cpp_method)
 
             # Member variables
-            for member in kernel_type.member_variables():
+            for member in kernel_wrapper_type.member_variables():
                 operator_cpp_class.add(member)
 
         # Add all kernels to the class.
-        for kernel in self.kernels:
-            kernel_op_count = "\n".join(
-                [
-                    f"Kernel type: {kernel.kernel_type.name}",
-                    f"- quadrature rule: {kernel.integration_info.quad}",
-                    f"- operations per element:",
-                    kernel.operation_count,
-                ]
-            )
+        for operator_method in self.operator_methods:
 
-            if class_files == CppClassFiles.HEADER_IMPL_AND_VARIANTS:
-                operator_cpp_class.add(
-                    CppMethodWithVariants(
-                        {
-                            platform: CppMethod.from_kernel_function(
-                                kernel,
-                                is_const=True,
-                                visibility="private",
-                                docstring=indent(kernel_op_count, "/// "),
-                            )
-                            for platform, kernel in kernel.platform_dependent_funcs.items()
-                        }
-                    )
+            num_integrals = len(operator_method.integration_infos)
+
+            if num_integrals != len(
+                operator_method.kernel_functions
+            ) or num_integrals != len(operator_method.operation_counts):
+                raise HOGException(
+                    "There should be as many IntegrationInfo (aka integrals) as KernelFunctions (aka kernels)."
                 )
-            else:
-                operator_cpp_class.add(
-                    CppMethod.from_kernel_function(
-                        kernel.kernel_function,
-                        is_const=True,
-                        visibility="private",
-                        docstring=indent(kernel_op_count, "/// "),
-                    )
+
+            for (
+                integration_info,
+                sub_kernel,
+                operation_count,
+                platform_dependent_funcs,
+            ) in zip(
+                operator_method.integration_infos,
+                operator_method.kernel_functions,
+                operator_method.operation_counts,
+                operator_method.platform_dependent_funcs,
+            ):
+                kernel_docstring = "\n".join(
+                    [
+                        f"\nIntegral: {integration_info.name}",
+                        f"- volume element:  {integration_info.geometry}",
+                        f"- kernel type:     {operator_method.kernel_wrapper_type.name}",
+                        f"- loop strategy:   {integration_info.loop_strategy}",
+                        f"- quadrature rule: {integration_info.quad}",
+                        f"- blending map:    {integration_info.blending}",
+                        f"- operations per element:",
+                        operation_count,
+                    ]
                 )
 
+                if class_files == CppClassFiles.HEADER_IMPL_AND_VARIANTS:
+                    operator_cpp_class.add(
+                        CppMethodWithVariants(
+                            {
+                                platform: CppMethod.from_kernel_function(
+                                    plat_dep_kernel,
+                                    is_const=True,
+                                    visibility="private",
+                                    docstring=indent(
+                                        kernel_docstring,
+                                        "/// ",
+                                    ),
+                                )
+                                for platform, plat_dep_kernel in platform_dependent_funcs.items()
+                            }
+                        )
+                    )
+                else:
+                    operator_cpp_class.add(
+                        CppMethod.from_kernel_function(
+                            sub_kernel,
+                            is_const=True,
+                            visibility="private",
+                            docstring=indent(
+                                kernel_docstring,
+                                "/// ",
+                            ),
+                        )
+                    )
+
         # Finally we know what fields we need and can build the constructors, member variables, and includes.
 
         # Constructors ...
@@ -423,10 +477,25 @@ class HyTeGElementwiseOperator:
         func_space_includes = set().union(
             *[coeff.includes() for coeff in self.coefficients()]
         )
-        kernel_includes = set().union(*[kt.includes() for kt in self.kernel_types])
+        kernel_includes = set().union(
+            *[kt.includes() for kt in self.kernel_wrapper_types]
+        )
         blending_includes = set()
-        for dim, integration_info in self.element_matrices.items():
-            for inc in integration_info.blending.coupling_includes():
+        for dim, integration_infos in self.integration_infos.items():
+
+            if not all(
+                [
+                    integration_infos[0].blending.coupling_includes()
+                    == io.blending.coupling_includes()
+                    for io in integration_infos
+                ]
+            ):
+                raise HOGException(
+                    "Seems that there are different blending functions in one bilinear form (likely in two different "
+                    "integrals). This is not supported yet. :("
+                )
+
+            for inc in integration_infos[0].blending.coupling_includes():
                 blending_includes.add(inc)
         all_includes = (
             self.INCLUDES | func_space_includes | kernel_includes | blending_includes
@@ -665,8 +734,9 @@ class HyTeGElementwiseOperator:
         self,
         dim: int,
         integration_info: IntegrationInfo,
-        loop_strategy: LoopStrategy,
         kernel_type: KernelType,
+        src_fields: List[FunctionSpaceImpl],
+        dst_fields: List[FunctionSpaceImpl],
     ) -> Tuple[ps.astnodes.Block, str]:
         """
         This method generates an AST that represents the passed kernel type.
@@ -675,7 +745,6 @@ class HyTeGElementwiseOperator:
 
         :param integration_info: IntegrationInfo object holding the symbolic element matrix, quadrature rule,
                                  element geometry, etc.
-        :param loop_strategy:    defines the iteration pattern
         :param kernel_type:      specifies the kernel to execute - this could be e.g., a matrix-vector
                                  multiplication
         :returns: tuple (pre_loop_stmts, loop, operations_table)
@@ -708,14 +777,14 @@ class HyTeGElementwiseOperator:
             self.symbolizer.dof_symbols_as_vector(
                 src_field.fe_space.num_dofs(geometry), src_field.name
             )
-            for src_field in kernel_type.src_fields
+            for src_field in src_fields
         ]
 
         dst_vecs_symbols = [
             self.symbolizer.dof_symbols_as_vector(
                 dst_field.fe_space.num_dofs(geometry), dst_field.name
             )
-            for dst_field in kernel_type.dst_fields
+            for dst_field in dst_fields
         ]
 
         # Do the kernel operation.
@@ -785,7 +854,7 @@ class HyTeGElementwiseOperator:
         element_index = [
             LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
         ]
-        loop = loop_strategy.create_loop(
+        loop = integration_info.loop_strategy.create_loop(
             dim, element_index, indexing_info.micro_edges_per_macro_edge
         )
 
@@ -802,7 +871,7 @@ class HyTeGElementwiseOperator:
                     element_type,
                     indexing_info,
                 )
-                for src_field in kernel_type.src_fields
+                for src_field in src_fields
             ]
             dst_vecs_accesses = [
                 dst_field.local_dofs(
@@ -811,7 +880,7 @@ class HyTeGElementwiseOperator:
                     element_type,
                     indexing_info,
                 )
-                for dst_field in kernel_type.dst_fields
+                for dst_field in dst_fields
             ]
 
             # Load source DoFs.
@@ -908,16 +977,14 @@ class HyTeGElementwiseOperator:
                     )
                 )
 
-                blending_assignments += (
-                    hog.code_generation.hessian_matrix_assignments(
-                        mat,
-                        integration_info.tables + quad_loop,
-                        geometry,
-                        self.symbolizer,
-                        affine_points=element_vertex_coordinates_symbols,
-                        blending=integration_info.blending,
-                        quad_info=integration_info.quad,
-                    )
+                blending_assignments += hog.code_generation.hessian_matrix_assignments(
+                    mat,
+                    integration_info.tables + quad_loop,
+                    geometry,
+                    self.symbolizer,
+                    affine_points=element_vertex_coordinates_symbols,
+                    blending=integration_info.blending,
+                    quad_info=integration_info.quad,
                 )
 
                 with TimedLogger("cse on blending operation", logging.DEBUG):
@@ -972,7 +1039,7 @@ class HyTeGElementwiseOperator:
             body = add_types(body, kernel_config)
 
             # add the created loop body of statements to the loop according to the loop strategy
-            loop_strategy.add_body_to_loop(loop, body, element_type)
+            integration_info.loop_strategy.add_body_to_loop(loop, body, element_type)
 
             # This actually replaces the abstract field access instances with
             # array accesses that contain the index computations.
@@ -1020,7 +1087,9 @@ class HyTeGElementwiseOperator:
             "renaming loop bodies and preloop stmts for element types", logging.DEBUG
         ):
             for element_type, preloop in preloop_stmts.items():
-                loop = loop_strategy.add_preloop_for_loop(loop, preloop, element_type)
+                loop = integration_info.loop_strategy.add_preloop_for_loop(
+                    loop, preloop, element_type
+                )
 
         # Add quadrature points and weights array declarations, but only those
         # which are actually needed.
@@ -1033,7 +1102,7 @@ class HyTeGElementwiseOperator:
 
         return (Block(body), ops.to_table())
 
-    def generate_kernels(self, loop_strategy: LoopStrategy) -> None:
+    def generate_kernels(self) -> None:
         """
         TODO: Split this up in a meaningful way.
               Currently does a lot of stuff and modifies the kernel_types.
@@ -1051,58 +1120,82 @@ class HyTeGElementwiseOperator:
         This information is gathered here, and written to the kernel type object,
         """
 
-        for kernel_type in self.kernel_types:
-            for dim, integration_info in self.element_matrices.items():
-                geometry = integration_info.geometry
+        for kernel_wrapper_type in self.kernel_wrapper_types:
+            for dim, integration_infos in self.integration_infos.items():
+
+                kernel_functions = []
+                kernel_op_counts = []
+                platform_dep_kernels = []
+
                 macro_type = {2: "face", 3: "cell"}
 
-                # generate AST of kernel loop
-                with TimedLogger(
-                    f"Generating kernel: {kernel_type.name} in {dim}D", logging.INFO
-                ):
-                    (
-                        function_body,
-                        kernel_op_count,
-                    ) = self._generate_kernel(
-                        dim, integration_info, loop_strategy, kernel_type
+                geometry = integration_infos[0].geometry
+                if not all([geometry == io.geometry for io in integration_infos]):
+                    raise HOGException(
+                        "All element geometries should be the same. Regardless of whether you integrate over their "
+                        "boundary or volume. Dev note: this information seems to be redundant and we should only have "
+                        "it in one place."
                     )
 
-                kernel_function = KernelFunction(
-                    function_body,
-                    Target.CPU,
-                    Backend.C,
-                    make_python_function,
-                    ghost_layers=None,
-                    function_name=f"{kernel_type.name}_macro_{geometry.dimensions}D",
-                    assignments=None,
-                )
+                for integration_info in integration_infos:
+
+                    # generate AST of kernel loop
+                    with TimedLogger(
+                        f"Generating kernel {integration_info.name} (wrapper: {kernel_wrapper_type.name} in {dim}D",
+                        logging.INFO,
+                    ):
+
+                        (
+                            function_body,
+                            kernel_op_count,
+                        ) = self._generate_kernel(
+                            dim,
+                            integration_info,
+                            kernel_wrapper_type.kernel_type,
+                            kernel_wrapper_type.src_fields,
+                            kernel_wrapper_type.dst_fields,
+                        )
 
-                # optimizer applies optimizations
-                with TimedLogger(
-                    f"Optimizing kernel: {kernel_type.name} in {dim}D", logging.INFO
-                ):
-                    optimizer = (
-                        self._optimizer
-                        if not isinstance(kernel_type, Assemble)
-                        else self._optimizer_no_vec
-                    )
-                    platform_dep_kernels = optimizer.apply_to_kernel(
-                        kernel_function, dim, loop_strategy
-                    )
+                        kernel_function = KernelFunction(
+                            function_body,
+                            Target.CPU,
+                            Backend.C,
+                            make_python_function,
+                            ghost_layers=None,
+                            function_name=f"{kernel_wrapper_type.name}_{integration_info.name}_macro_{geometry.dimensions}D",
+                            assignments=None,
+                        )
 
-                kernel_parameters = kernel_function.get_parameters()
+                        kernel_functions.append(kernel_function)
+                        kernel_op_counts.append(kernel_op_count)
+
+                    # optimizer applies optimizations
+                    with TimedLogger(
+                        f"Optimizing kernel: {kernel_function.function_name} in {dim}D",
+                        logging.INFO,
+                    ):
+                        optimizer = (
+                            self._optimizer
+                            if not isinstance(kernel_wrapper_type.kernel_type, Assemble)
+                            else self._optimizer_no_vec
+                        )
+                        platform_dep_kernels.append(
+                            optimizer.apply_to_kernel(
+                                kernel_function, dim, integration_info.loop_strategy
+                            )
+                        )
 
-                # setup kernel string and op count table
+                # Setup kernel wrapper string and op count table
                 #
-                # in the following, we insert the sub strings of the final kernel string:
+                # In the following, we insert the sub strings of the final kernel string:
                 # coefficients (retrieving pointers), setup of scalar parameters, kernel function call
                 # This is done as follows:
                 # - the kernel type has the skeleton string in which the sub string must be substituted
-                # - the function space impl knows from which function type a src/dst/coefficient field is and can return the
-                #   corresponding sub string
+                # - the function space impl knows from which function type a src/dst/coefficient field is and can return
+                #   the corresponding sub string
 
                 # Retrieve coefficient pointers
-                kernel_type.substitute(
+                kernel_wrapper_type.substitute(
                     {
                         f"pointer_retrieval_{dim}D": "\n".join(
                             coeff.pointer_retrieval(dim)
@@ -1126,32 +1219,56 @@ class HyTeGElementwiseOperator:
                     ]
                 )
 
-                scalar_parameter_setup += (
-                    integration_info.blending.parameter_coupling_code()
-                )
+                blending_parameter_coupling_code = integration_infos[
+                    0
+                ].blending.parameter_coupling_code()
+                if not all(
+                    [
+                        blending_parameter_coupling_code
+                        == io.blending.parameter_coupling_code()
+                        for io in integration_infos
+                    ]
+                ):
+                    raise HOGException(
+                        "It seems you specified different blending maps for one bilinear form. "
+                        "This may be desired, but it is certainly not supported :)."
+                    )
+
+                scalar_parameter_setup += blending_parameter_coupling_code
 
-                kernel_type.substitute(
+                kernel_wrapper_type.substitute(
                     {f"scalar_parameter_setup_{dim}D": scalar_parameter_setup}
                 )
 
-                # Kernel function call.
-                kernel_function_call_parameter_string = ",\n".join(
-                    [str(prm) for prm in kernel_parameters]
-                )
-                kernel_type.substitute(
+                # Kernel function call(s).
+                kernel_function_call_strings = []
+                for kernel_function in kernel_functions:
+                    kernel_parameters = kernel_function.get_parameters()
+
+                    kernel_function_call_parameter_string = ",\n".join(
+                        [str(prm) for prm in kernel_parameters]
+                    )
+
+                    kernel_function_call_strings.append(
+                        f"""
+                                    {kernel_function.function_name}(\n
+                                    {kernel_function_call_parameter_string});"""
+                    )
+
+                kernel_wrapper_type.substitute(
                     {
-                        f"kernel_function_call_{dim}D": f"""
-                                {kernel_function.function_name}(\n
-                                {kernel_function_call_parameter_string});"""
+                        f"kernel_function_call_{dim}D": "\n".join(
+                            kernel_function_call_strings
+                        )
                     }
                 )
 
                 # Collect all information
-                kernel = Kernel(
-                    kernel_type,
-                    kernel_function,
+                kernel = OperatorMethod(
+                    kernel_wrapper_type,
+                    kernel_functions,
                     platform_dep_kernels,
-                    kernel_op_count,
-                    integration_info,
+                    kernel_op_counts,
+                    integration_infos,
                 )
-                self.kernels.append(kernel)
+                self.operator_methods.append(kernel)
diff --git a/hog/operator_generation/optimizer.py b/hog/operator_generation/optimizer.py
index a64fc0ae499c63f805a051e30e5ab599c444960d..6d433c4b027553fd681954c9411f47fdc450c75d 100644
--- a/hog/operator_generation/optimizer.py
+++ b/hog/operator_generation/optimizer.py
@@ -108,8 +108,8 @@ class Optimizer:
     def __getitem__(self, opt):
         return opt in self._opts
 
-    def check_opts_validity(self, loop_strategy: LoopStrategy) -> None:
-        """Checks if the desired optimizations are valid for the given loop strategy."""
+    def check_opts_validity(self) -> None:
+        """Checks if the desired optimizations are valid."""
 
         if Opts.VECTORIZE512 in self._opts and not Opts.VECTORIZE in self._opts:
             raise HOGException("Optimization VECTORIZE512 requires VECTORIZE.")
@@ -191,6 +191,7 @@ class Optimizer:
 
             with TimedLogger("simplifying conditionals", logging.DEBUG):
                 simplify_conditionals(loop, loop_counter_simplification=True)
+
         if self[Opts.MOVECONSTANTS]:
             with TimedLogger("moving constants out of loop", logging.DEBUG):
                 # This has to be done twice because sometimes constants are not moved completely to the surrounding block but
diff --git a/hog_tests/operator_generation/test_opgen_smoke.py b/hog_tests/operator_generation/test_opgen_smoke.py
new file mode 100644
index 0000000000000000000000000000000000000000..b328de9311ecd64a9ad32fb435e2b0d599212314
--- /dev/null
+++ b/hog_tests/operator_generation/test_opgen_smoke.py
@@ -0,0 +1,114 @@
+# HyTeG Operator Generator
+# Copyright (C) 2024  HyTeG Team
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from sympy.core.cache import clear_cache
+
+from hog.blending import IdentityMap
+from hog.operator_generation.loop_strategies import CUBES
+from hog.operator_generation.optimizer import Opts
+from hog.element_geometry import TriangleElement
+from hog.function_space import LagrangianFunctionSpace
+from hog.operator_generation.operators import (
+    HyTeGElementwiseOperator,
+    MacroIntegrationDomain,
+)
+from hog.symbolizer import Symbolizer
+from hog.quadrature import Quadrature, select_quadrule
+from hog.forms import div_k_grad, mass
+from hog.operator_generation.kernel_types import ApplyWrapper
+from hog.operator_generation.types import hyteg_type
+from hog.blending import AnnulusMap
+
+
+def test_opgen_smoke():
+    """
+    Just a simple smoke test to check that an operator can be generated.
+
+    If something is really broken, this will make the CI fail early.
+
+    We are generating a matvec method here for
+
+        ∫ k ∇u · ∇v dx + ∫ uv dx
+
+    with either integral being evaluated in their own kernel.
+
+    That may not be reasonable but tests some features.
+    """
+    clear_cache()
+
+    symbolizer = Symbolizer()
+    volume_geometry = TriangleElement()
+
+    name = f"P2DivKGradBlendingPlusMass"
+
+    trial = LagrangianFunctionSpace(2, symbolizer)
+    test = LagrangianFunctionSpace(2, symbolizer)
+    coeff = LagrangianFunctionSpace(2, symbolizer)
+    quad = Quadrature(select_quadrule(2, volume_geometry), volume_geometry)
+
+    divkgrad = div_k_grad(trial, test, volume_geometry, symbolizer, AnnulusMap(), coeff)
+    m = mass(trial, test, volume_geometry, symbolizer, AnnulusMap())
+
+    type_descriptor = hyteg_type()
+
+    kernel_types = [
+        ApplyWrapper(
+            test,
+            trial,
+            type_descriptor=type_descriptor,
+            dims=[2],
+        )
+    ]
+
+    opts = {Opts.MOVECONSTANTS, Opts.VECTORIZE, Opts.TABULATE, Opts.QUADLOOPS}
+
+    operator = HyTeGElementwiseOperator(
+        name,
+        symbolizer=symbolizer,
+        kernel_wrapper_types=kernel_types,
+        opts=opts,
+        type_descriptor=type_descriptor,
+    )
+
+    operator.add_integral(
+        name="div_k_grad",
+        dim=volume_geometry.dimensions,
+        geometry=volume_geometry,
+        integration_domain=MacroIntegrationDomain.VOLUME,
+        quad=quad,
+        blending=AnnulusMap(),
+        form=divkgrad,
+        loop_strategy=CUBES(),
+    )
+
+    operator.add_integral(
+        name="mass",
+        dim=volume_geometry.dimensions,
+        geometry=volume_geometry,
+        integration_domain=MacroIntegrationDomain.VOLUME,
+        quad=quad,
+        blending=AnnulusMap(),
+        form=m,
+        loop_strategy=CUBES(),
+    )
+
+    operator.generate_class_code(
+        ".",
+    )
+
+
+if __name__ == "__main__":
+    test_opgen_smoke()