diff --git a/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp b/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp
index 0bb5110fa8ba4b739457ba929121f67ac459dc6c..3cf5069cd0980acdf2bc77dc1521d3cfe0d19c47 100644
--- a/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp
+++ b/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp
@@ -53,6 +53,24 @@ void {{class_name}}::operator() ( IBlock * block{%if target is equalto 'gpu'%} ,
 }
 
 
+void {{class_name}}::runOnCellInterval( const shared_ptr<StructuredBlockStorage> & blocks,
+                                        const CellInterval & globalCellInterval,
+                                        cell_idx_t ghostLayers,
+                                        IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} )
+{
+    CellInterval ci = globalCellInterval;
+    CellInterval blockBB = blocks->getBlockCellBB( *block);
+    blockBB.expand( ghostLayers );
+    ci.intersect( blockBB );
+    blocks->transformGlobalToBlockLocalCellInterval( ci, *block );
+    if( ci.empty() )
+        return;
+
+    {{kernel|generate_block_data_to_field_extraction|indent(4)}}
+    {{kernel|generate_call(stream='stream', cell_interval='ci')|indent(4)}}
+    {{kernel|generate_swaps|indent(4)}}
+}
+
 
 void {{class_name}}::inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} )
 {
diff --git a/pystencils_walberla/templates/SweepInnerOuter.tmpl.h b/pystencils_walberla/templates/SweepInnerOuter.tmpl.h
index 141df478a9d3a1fc89ce58eead323479e54814be..733083a56e6d10008f605c37ed9eb96eddabf97e 100644
--- a/pystencils_walberla/templates/SweepInnerOuter.tmpl.h
+++ b/pystencils_walberla/templates/SweepInnerOuter.tmpl.h
@@ -17,6 +17,7 @@
 //! \\author pystencils
 //======================================================================================================================
 
+#pragma once
 #include "core/DataTypes.h"
 
 {% if target is equalto 'cpu' -%}
@@ -28,7 +29,7 @@
 #include "field/SwapableCompare.h"
 #include "domain_decomposition/BlockDataID.h"
 #include "domain_decomposition/IBlock.h"
-
+#include "domain_decomposition/StructuredBlockStorage.h"
 #include <set>
 
 #ifdef __GNUC__
@@ -60,6 +61,28 @@ public:
 
     void operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %} );
 
+    void runOnCellInterval(const shared_ptr<StructuredBlockStorage> & blocks,
+                           const CellInterval & globalCellInterval, cell_idx_t ghostLayers, IBlock * block
+                           {%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %});
+
+
+
+    static std::function<void (IBlock*)> getSweep(const shared_ptr<{{class_name}}> & kernel) {
+        return [kernel](IBlock * b) { (*kernel)(b); };
+    }
+
+    static std::function<void (IBlock*{%if target is equalto 'gpu'%} , cudaStream_t {% endif %})>
+            getSweepOnCellInterval(const shared_ptr<{{class_name}}> & kernel,
+                                   const shared_ptr<StructuredBlockStorage> & blocks,
+                                   const CellInterval & globalCellInterval,
+                                   cell_idx_t ghostLayers=1 )
+    {
+        return [kernel, blocks, globalCellInterval, ghostLayers] (IBlock * b{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %}) {
+            kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b{%if target is equalto 'gpu'%},stream {% endif %});
+        };
+    }
+
+
     void inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %} );
     void outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %} );