diff --git a/generate_all_operators.py b/generate_all_operators.py
index e1904eb6da34f1a4ae0baf57a8adf1d8e0cd0911..54b3260201f0855bc472019cf2a3e0185c057d68 100644
--- a/generate_all_operators.py
+++ b/generate_all_operators.py
@@ -72,7 +72,8 @@ from hog.operator_generation.kernel_types import (
     ApplyWrapper,
     AssembleWrapper,
     AssembleDiagonalWrapper,
-    KernelWrapperType, AssembleLocalMatrices, AssembleLocalMatricesWrapper,
+    KernelWrapperType, AssembleLocalMatricesWrapper,
+
 )
 from hog.operator_generation.loop_strategies import (
     LoopStrategy,
@@ -572,14 +573,14 @@ class OperatorInfo:
                     )
                 )
 
-            #self.kernel_types.append(
-            #    AssembleLocalMatricesWrapper(
-            #        self.trial_space,
-            #        self.test_space,
-            #        type_descriptor=self.type_descriptor,
-            #        dims=dims,
-            #    )
-            #)
+            self.kernel_types.append(
+                AssembleLocalMatricesWrapper(
+                    self.trial_space,
+                    self.test_space,
+                    type_descriptor=self.type_descriptor,
+                    dims=dims,
+                )
+            )
 
 
 def all_operators(
@@ -603,7 +604,7 @@ def all_operators(
     # fmt: off
     # TODO switch to manual specification of opts for now/developement, later use default factory
     ops.append(OperatorInfo("P1", "PSPG", TrialSpace(P1), TestSpace(P1),
-                            form=partial(div_k_grad, coefficient_function_space=P1),
+                            form=partial(pspg, coefficient_function_space=P1),
                             type_descriptor=type_descriptor, geometries=list(geometries), opts=opts, blending=blending))
 
     ops.append(OperatorInfo("N1E1", "CurlCurl", TrialSpace(N1E1), TestSpace(N1E1), form=curl_curl,
diff --git a/hog/operator_generation/kernel_types.py b/hog/operator_generation/kernel_types.py
index 4bd8ec5a2c83fad8312af74b4645488c6c96d750..665090f56c34436218ac32e92b848cd5b95b707e 100644
--- a/hog/operator_generation/kernel_types.py
+++ b/hog/operator_generation/kernel_types.py
@@ -559,11 +559,16 @@ class ApplyWrapper(KernelWrapperType):
                     f"    $scalar_parameter_setup_{dim}D\n"
                     f"\n"
                     f'    this->timingTree_->start( "kernel" );\n'
-                    f'    if (localElementMatricesPrecomputed_){{\n'
-                    f"    $Apply_{dim}D\n"
-                    f"    }} else {{\n"
-                    f"    $ApplyLocalMatrices_{dim}D\n"
-                    f"    }}\n"
+                    f'    if ( elementsWithPrecomputedMatrix_[{macro}]) {{\n'
+                    f'      if (localElementMatricesPrecomputed_){{\n'
+                    f"          $ApplyLocalMatrices_{dim}D\n"
+                    f"      }}\n"
+                    f"      else {{ \n"
+                    f'          WALBERLA_ABORT("Local matrix not precomputed on this element")\n'
+                    f'      }}\n'
+                    f'    }} else {{\n'
+                    f'      $Apply_{dim}D\n'
+                    f'    }}\n'
                     f'    this->timingTree_->stop( "kernel" );\n'
                     f"}}\n"
                     f"\n"
@@ -994,7 +999,9 @@ class AssembleLocalMatricesWrapper(KernelWrapperType):
                     f"\n"
                     f"    auto localMatrices = localElementMatrices{dim}D_.at({macro}.getID()).at(level);\n"          
                     f'    this->timingTree_->start( "kernel" );\n'
-                    f"    $AssembleLocalMatrices_{dim}D\n"
+                    f'    if ( elementsWithPrecomputedMatrix_[{macro}]) {{\n'
+                    f"       $AssembleLocalMatrices_{dim}D\n"
+                    f'    }}\n'
                     f'    this->timingTree_->stop( "kernel" );\n'
                     f"}}"
                 )
@@ -1063,6 +1070,13 @@ class AssembleLocalMatricesWrapper(KernelWrapperType):
                     type=f"bool",
                 ),
                 visibility="public",
+            ),
+            CppMemberVariable(
+                CppVariable(
+                    name=f"elementsWithPrecomputedMatrix_",
+                    type=f"std::map<PrimitiveID, bool>",
+                ),
+                visibility="public",
             )
         ]
         return members
diff --git a/hyteg_integration_tests/src/DiffusionP1.cpp b/hyteg_integration_tests/src/DiffusionP1.cpp
index ba0c748d9b021c079dd744a502ad2f652c66d96c..e114511a8da6737f89291b5e4d0f16ad494d311f 100644
--- a/hyteg_integration_tests/src/DiffusionP1.cpp
+++ b/hyteg_integration_tests/src/DiffusionP1.cpp
@@ -55,6 +55,12 @@ int main( int argc, char* argv[] )
       compareAssembledMatrix< P1ElementwiseLaplaceOperator, operatorgeneration::TestOpDiffusion >(
           level, storageSetup, storageSetup.description() + " Assembly", 360 );
 #endif
+
+
+#ifdef TEST_LOCAL_PRECOMPUTATION
+       testOperators< P1Function< real_t >, P1ElementwiseLaplaceOperator, operatorgeneration::TestOpDiffusion >(
+          level, storageSetup, 225, 9.0e6, false, true );
+#endif
    }
 
    return EXIT_SUCCESS;
diff --git a/hyteg_integration_tests/src/OperatorGenerationTest.hpp b/hyteg_integration_tests/src/OperatorGenerationTest.hpp
index 8719f6d5ef62fbed630bedb3fc89c39deb71f1d7..d061805856c3389c4c9944b206f37d9ca62e6a24 100644
--- a/hyteg_integration_tests/src/OperatorGenerationTest.hpp
+++ b/hyteg_integration_tests/src/OperatorGenerationTest.hpp
@@ -128,7 +128,8 @@ void compareApply( const uint_t        level,
                    const StorageSetup& storageSetup,
                    const std::string&  message,
                    const real_t        thresholdOverMachineEps,
-                   const bool          writeVTK = false )
+                   const bool          writeVTK = false,
+                   const bool         testPrecomputation = false )
 {
    OperatorFactory< RefOpType > makeRefOp = []( std::shared_ptr< PrimitiveStorage > storage, uint_t minLevel, uint_t maxLevel ) {
       return RefOpType( storage, minLevel, maxLevel );
@@ -146,7 +147,8 @@ void compareApply( OperatorFactory< RefOpType > refOpFactory,
                    const StorageSetup&          storageSetup,
                    const std::string&           message,
                    const real_t                 thresholdOverMachineEps,
-                   const bool                   writeVTK = false )
+                   const bool                   writeVTK = false,
+                   const bool         testPrecomputation = false)
 {
    static_assert( std::is_same_v< typename RefOpType::srcType::valueType, typename RefOpType::dstType::valueType >,
                   "Reference operator has a different ValueType for source and destination." );
@@ -200,6 +202,20 @@ void compareApply( OperatorFactory< RefOpType > refOpFactory,
    opRef.apply( refSrc, refDst, level, All, Replace );
 
    OpType op = testOpFactory( storage, level, level );
+   if(testPrecomputation) {
+     if ( storage_->hasGlobalCells() )
+     {
+       for ( auto& it : storage_->getCells() )
+       {
+            op.elementsWithPrecomputedMatrix_[*it.first] = true;
+       }
+     } else {
+       for ( auto& it : storage_->getFaces() )
+       {
+          op.elementsWithPrecomputedMatrix_[*it.first] = true;
+       }
+     }
+   }
    op.apply( src, dst, level, All, Replace );
 
    // compare
@@ -480,11 +496,12 @@ void testOperators( const uint_t        level,
                     const StorageSetup& storageSetup,
                     const real_t        thresholdOverMachineEpsApply,
                     const real_t        thresholdOverMachineEpsInvDiag,
-                    const bool          writeVTK = false )
+                    const bool          writeVTK = false,
+                    const bool         testPrecomputation = false)
 {
    WALBERLA_LOG_INFO_ON_ROOT( "\n" << storageSetup.description() << " Apply\n" )
    ( compareApply< Reference, Operators >(
-         level, storageSetup, typeid( Operators ).name(), thresholdOverMachineEpsApply, writeVTK ),
+         level, storageSetup, typeid( Operators ).name(), thresholdOverMachineEpsApply, writeVTK, testPrecomputation ),
      ... );
 
    WALBERLA_LOG_INFO_ON_ROOT( "\n" << storageSetup.description() << " Inverse Diagonal\n" )