diff --git a/src/pystencils_autodiff/CMakeLists.tmpl.txt b/src/pystencils_autodiff/CMakeLists.tmpl.txt
index 3765fab75bc575e1789d564d0ba06a5bb3ac97ae..062647cae19db52c6fc83163c6870edd18a817f6 100644
--- a/src/pystencils_autodiff/CMakeLists.tmpl.txt
+++ b/src/pystencils_autodiff/CMakeLists.tmpl.txt
@@ -4,3 +4,7 @@ waLBerla_add_executable ( NAME {{ cmake_target_name }}
                                 {{ f }} 
                                 {%- endfor %}
                           DEPENDS {{depends | join(' ')}})
+
+{% if 'walberla_openvdb' in depends %}
+target_link_libraries( {{ cmake_target_name }} tbb boundary openvdb Half IexMath Iex IlmThread Imath)
+{% endif %}
diff --git a/src/pystencils_autodiff/framework_integration/astnodes.py b/src/pystencils_autodiff/framework_integration/astnodes.py
index d83b6bd2984d141536e696d04267e948da8d8be4..f724079dc11c6975c20aab97cfc57ce2490a3ed6 100644
--- a/src/pystencils_autodiff/framework_integration/astnodes.py
+++ b/src/pystencils_autodiff/framework_integration/astnodes.py
@@ -229,6 +229,7 @@ class JinjaCppFile(Node):
         render_dict.update({"globals": sorted({
             self.printer(g) for g in pystencils.backends.cbackend.get_global_declarations(self)
         }, key=str)})
+        # self.TEMPLATE.environment = self.ENVIROMENT
 
         return self.TEMPLATE.render(render_dict)
 
diff --git a/src/pystencils_autodiff/graph_datahandling.py b/src/pystencils_autodiff/graph_datahandling.py
index 2a5b7cd8f3c684e0dd7d70e9699d328ca91b2ac8..c7dc917faf51fbf7e4ae71293c61058e5bd9d6a0 100644
--- a/src/pystencils_autodiff/graph_datahandling.py
+++ b/src/pystencils_autodiff/graph_datahandling.py
@@ -88,6 +88,16 @@ class KernelCall:
         return "Call " + str(self.kernel.ast.function_name)
 
 
+class FieldOutput:
+    def __init__(self, fields, output_path, flag_field):
+        self.fields = fields
+        self.output_path = output_path
+        self.flag_field = flag_field
+
+    def __str__(self):
+        return "Writing fields " + str(self.fields)
+
+
 class TimeloopRun:
     def __init__(self, timeloop, time_steps):
         self.timeloop = timeloop
@@ -258,6 +268,12 @@ class GraphDataHandling(pystencils.datahandling.SerialDataHandling):
             if isinstance(t, TimeloopRun):
                 self.merge_swaps_with_kernel_calls(t.timeloop._single_step_asts)
 
+    def save_fields(self, fields, output_path, flag_field=None):
+        if isinstance(fields, str):
+            fields = [fields]
+        fields = [self.fields[f] if isinstance(f, str) else f for f in fields]
+        self.call_queue.append(FieldOutput(fields, output_path, flag_field))
+
     # TODO
     # def reduce_float_sequence(self, sequence, operation, all_reduce=False) -> np.array:
         # return np.array(sequence)
diff --git a/src/pystencils_autodiff/simulation.py b/src/pystencils_autodiff/simulation.py
index e48a371f59c372618d2b160acbcdb3955bc9ac5f..9b176ef29364b47c9698457df0c1b5a0a84af654 100644
--- a/src/pystencils_autodiff/simulation.py
+++ b/src/pystencils_autodiff/simulation.py
@@ -18,9 +18,10 @@ import pystencils_walberla.codegen
 from pystencils.astnodes import Block, EmptyLine
 from pystencils.cpu.cpujit import get_headers
 from pystencils_autodiff.walberla import (
-    AllocateAllFields, CMakeLists, Communication, DefineKernelObjects, DefinitionsHeader, FieldCopy,
-    ForLoop, InitBoundaryHandling, LbCommunicationSetup, ResolveUndefinedSymbols, SwapFields,
-    SweepCreation, SweepOverAllBlocks, UniformBlockforestFromConfig, WalberlaMain, WalberlaModule)
+    AllocateAllFields, CMakeLists, Communication, DefineKernelObjects, DefinitionsHeader,
+    FieldCopy, InitBoundaryHandling, LbCommunicationSetup, ResolveUndefinedSymbols, SwapFields,
+    SweepCreation, SweepOverAllBlocks, TimeLoopNode, UniformBlockforestFromConfig, WalberlaMain,
+    WalberlaModule, WriteVdb)
 
 WALBERLA_MODULES = ["blockforest",
                     "boundary",
@@ -64,7 +65,8 @@ class Simulation():
                  lb_rule=None,
                  refinement_scaling=None,
                  boundary_handling_target='gpu',
-                 cmake_target_name='autogen_app'):
+                 cmake_target_name='autogen_app',
+                 write_out_stuff=True):
         self._data_handling = graph_data_handling
         self._lb_rule = lb_rule
         self._refinement_scaling = refinement_scaling
@@ -82,6 +84,9 @@ class Simulation():
         self._data_handling.merge_swaps_with_kernel_calls()
         self._packinfo_class = 'PackInfo'
         self.cmake_target_name = cmake_target_name
+        self._write_out_stuff = write_out_stuff
+        self._fluid_uid = None
+        self._debug = None
 
     def _create_helper_files(self) -> Dict[str, str]:
 
@@ -132,18 +137,23 @@ class Simulation():
 
         call_nodes = filter(lambda x: x, [self._graph_to_sweep(c) for c in self._data_handling.call_queue])
 
+        init_boundary_handling = (InitBoundaryHandling(self._block_forest.blocks,
+                                                       flag_field_id,
+                                                       pdf_field_id,
+                                                       self.boundary_conditions,
+                                                       self._boundary_kernels,
+                                                       self._field_allocations)
+                                  if self._boundary_handling
+                                  else EmptyLine())
+        if hasattr(init_boundary_handling, 'fluid'):
+            self._fluid_uid = init_boundary_handling.fluid
+
         module = WalberlaModule(WalberlaMain(Block([
             self._block_forest,
             ResolveUndefinedSymbols(
                 Block([
                     field_allocations,
-                    InitBoundaryHandling(self._block_forest.blocks,
-                                         flag_field_id,
-                                         pdf_field_id,
-                                         self.boundary_conditions,
-                                         self._boundary_kernels,
-                                         self._field_allocations)
-                    if self._boundary_handling else EmptyLine(),
+                    init_boundary_handling,
                     LbCommunicationSetup(self._lb_model_name,
                                          pdf_field_id,
                                          self._packinfo_class,
@@ -156,6 +166,9 @@ class Simulation():
             )
         ])))
 
+        if self._debug:
+            from pystencils_autodiff.framework_integration.printer import DebugFrameworkPrinter
+            module.printer = DebugFrameworkPrinter()
         self._codegen_context.write_file("main.cpp", str(module))
         return module
 
@@ -177,6 +190,7 @@ class Simulation():
                     walberla_dependencies.append(module)
 
         dependencies = walberla_dependencies + extra_dependencis
+
         try:
             self._codegen_context.write_file("CMakeLists.txt",
                                              str(CMakeLists(self.cmake_target_name,
@@ -201,7 +215,8 @@ class Simulation():
         return self._boundary_handling._boundary_object_to_boundary_info.keys()
 
     def _graph_to_sweep(self, c):
-        from pystencils_autodiff.graph_datahandling import KernelCall, TimeloopRun, DataTransferKind, DataTransfer
+        from pystencils_autodiff.graph_datahandling import (
+            KernelCall, TimeloopRun, DataTransferKind, DataTransfer, FieldOutput)
 
         if isinstance(c, KernelCall):
 
@@ -223,7 +238,7 @@ class Simulation():
 
         elif isinstance(c, TimeloopRun):
             sweeps = [self._graph_to_sweep(s) for s in c.timeloop._single_step_asts]
-            rtn = ForLoop(0, c.time_steps, sweeps)
+            rtn = TimeLoopNode(0, c.time_steps, sweeps)
 
         elif isinstance(c, DataTransfer):
             if c.kind == DataTransferKind.HOST_SWAP:
@@ -246,7 +261,10 @@ class Simulation():
                 rtn = Communication(self._boundary_handling_target == 'gpu')
             else:
                 rtn = None
+        elif isinstance(c, FieldOutput):
+            rtn = WriteVdb(self._block_forest, c.output_path, c.fields, c.flag_field, self._fluid_uid)
         else:
             rtn = None
 
         return rtn
+        return rtn
diff --git a/src/pystencils_autodiff/walberla.py b/src/pystencils_autodiff/walberla.py
index e882ce8b1dc8fdb9ee31e9ba85e3d78dc05a152d..73fbe0bf5671a88cd532108580a5cfd10f1a5bd2 100644
--- a/src/pystencils_autodiff/walberla.py
+++ b/src/pystencils_autodiff/walberla.py
@@ -15,6 +15,7 @@ import jinja2
 import numpy as np
 import sympy as sp
 from stringcase import camelcase, pascalcase
+from sympy.core.cache import cacheit
 
 import pystencils
 from pystencils.astnodes import SympyAssignment
@@ -30,6 +31,43 @@ def _make_field_type(field, on_gpu):
     return make_field_type(pystencils.data_types.get_base_type(field.dtype), f_size, on_gpu)
 
 
+class BlockDataID(TypedSymbol):
+    """
+    Local cell interval in global index coordinates
+    """
+    def __new__(cls, *args, **kwds):
+        obj = BlockDataID.__xnew_cached_(cls, *args, **kwds)
+        return obj
+
+    def __new_stage2__(cls, field, on_gpu, *args, **kwargs):
+        name = field.name + ('_data_gpu' if on_gpu else '_data')
+        obj = super(BlockDataID, cls).__xnew__(cls,
+                                               name,
+                                               'BlockDataID',
+                                               *args,
+                                               **kwargs)
+        obj.field = field
+        obj.on_gpu = on_gpu
+        return obj
+
+    __xnew__ = staticmethod(__new_stage2__)
+    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
+
+    def _hashable_content(self):
+        return super()._hashable_content(), self.on_gpu
+
+    def __getnewargs__(self):
+        return self.name, self.on_gpu
+
+    @property
+    def walberla_field_type(self):
+        return _make_field_type(self.field, self.on_gpu)
+
+    @property
+    def walberla_field_type(self):
+        return _make_field_type(self.field, self.on_gpu)
+
+
 class FieldType(JinjaCppFile):
 
     TEMPLATE = jinja2.Template("{{ field_type }}")
@@ -222,7 +260,7 @@ BlockDataID {{ field_name }}_data = field::addToStorage<{{ field_type }}>( {{ bl
 """)  # noqa
 
     def __init__(self, block_forest, field, on_gpu=False, usePitchedMem=True, num_ghost_layers=1, ):
-        self._symbol = TypedSymbol(field.name + ('_data_gpu' if on_gpu else '_data'), 'BlockDataID')
+        self._symbol = BlockDataID(field, on_gpu)
         ast_dict = {
             'block_forest': block_forest,
             'field_name': field.name,
@@ -570,6 +608,13 @@ class CppObjectConstruction(JinjaCppFile, pystencils.astnodes.SympyAssignment):
     def symbol(self):
         return self.lhs
 
+    def __sympy__(self):
+        return self.symbol
+
+    @property
+    def symbols_defined(self):
+        return {self.symbol}
+
 
 class ResolveUndefinedSymbols(JinjaCppFile):
 
@@ -640,8 +685,12 @@ class DefineKernelObjects(JinjaCppFile):
 
     def __init__(self, block):
         self.sweeps = block.atoms(SweepOverAllBlocks)
-        self.kernels = [SympyAssignment(k.ast_dict.functor.symbol, k.ast_dict.functor,
-                                        is_const=False, use_auto=True) for k in self.sweeps]
+        self.kernels = [k.ast_dict.functor if isinstance(k.ast_dict.functor, SympyAssignment)
+                        else SympyAssignment(k.ast_dict.functor.symbol, k.ast_dict.functor,
+                                             is_const=False, use_auto=True)
+
+
+                        for k in self.sweeps]
         ast_dict = {'block': block,
                     'kernels': self.kernels,
                     }
@@ -867,7 +916,7 @@ class SweepOverAllBlocks(JinjaCppFile):
         super().__init__(ast_dict)
 
     @property
-    def symbols_undefined(self):
+    def undefined_symbols(self):
         return {self.ast_dict.functor.symbol}
 
     @property
@@ -884,13 +933,11 @@ static inline auto sweep(walberla::shared_ptr<BlockStorage_T> blocks, Functor_T
 class FieldCopy(JinjaCppFile):
     TEMPLATE = jinja2.Template("""cuda::fieldCpy< {{src_type }}, {{dst_type }} > ({{block_forest }}, {{src_id }}, {{dst_id }}); """)  # noqa
 
-    def __init__(self, block_forest, src_id, src_field, src_gpu, dst_id, dst_field, dst_gpu):
-        src_type = _make_field_type(src_field, src_gpu)
-        dst_type = _make_field_type(dst_field, dst_gpu)
+    def __init__(self, block_forest, src_id, dst_id,):
         ast_dict = {'src_id': src_id,
                     'dst_id': dst_id,
-                    'src_type': src_type,
-                    'dst_type': dst_type,
+                    'src_type': src_id.walberla_field_type,
+                    'dst_type': dst_id.walberla_field_type,
                     'block_forest': block_forest}
         super().__init__(ast_dict)
 
@@ -933,22 +980,52 @@ class Communication(JinjaCppFile):
 class VdbWriter(CppObjectConstruction):
     def __init__(self, block_forest):
         symbol = TypedSymbol('vdb', 'walberla_openvdb::OpenVdbFieldWriter')
-        super().__init__(symbol, block_forest)
+        super().__init__(symbol, [block_forest.blocks])
 
     headers = ['"walberla_openvdb/OpenVdbFieldWriter.h"']
 
+    @property
+    def dtype(self):
+        return self.symbol.dtype
+
+    @property
+    def free_symbols(self):
+        return {}
+
 
 class WriteVdb(SweepOverAllBlocks):
     TEMPLATE = jinja2.Template(
-        """{{vdb_writer}}.beginFile({{file_prefix}}{% if time %}, optional<float>({{time}}){% endif %});
-{% for f in fields %}
-{{ vdb_writer }}.addField<{{ f.dtype }}, {{ f.vdb_type }}>( {{f.id}}, {{f.name}} );
+        """{{vdb_writer}}.beginFile("{{output_path}}"{% if time %}, optional<float>({{time}}){% endif %});
+{% for f in fields  %}
+{%- if flag_field_id -%}
+{{ vdb_writer }}.addFieldFiltered<{{ f.walberla_field_type }}, VdbGrid_T<float>, FlagField_T>( {{f}}, "{{f}}", {{ flag_field_id }}, {{ fluid_uid }});
+{%- else -%}
+{{ vdb_writer }}.addField<{{ f.walberla_field_type }}, VdbGrid_T<float>>( {{f}}, "{{f}}");
+{%- endif -%}
 {% endfor %}
-{{vdb_writer}}.writeFile();""")
+{{vdb_writer}}.writeFile();""")  # noqa
 
-    def __init__(self, fields_to_write, block_forest):
+    def __init__(self, block_forest, output_path, fields_to_write, flag_field_id, fluid_uid, time=None):
         functor = VdbWriter(block_forest)
-        ast_dict = {'functor': functor,
+        ast_dict = {'fields': [BlockDataID(f, on_gpu=False) for f in fields_to_write],
+                    'output_path': output_path,
                     'vdb_writer': functor.symbol,
-                    'block_forest': block_forest}
-        super().__init__(ast_dict)
+                    'flag_field_id': flag_field_id,
+                    'fluid_uid': fluid_uid,
+                    'functor': functor,
+                    'block_forest': block_forest,
+                    'time': time}
+        JinjaCppFile.__init__(self, ast_dict)
+
+    required_global_declarations = ["""template < typename T >
+using VdbGrid_T = openvdb::Grid < typename openvdb::tree::Tree4 < T, 5, 4, 3 >::Type >;"""]
+
+    @property
+    def undefined_symbols(self):
+        return {self.ast_dict.vdb_writer}
+
+
+class TimeLoopNode(ForLoop):
+    @property
+    def time(self):
+        return self.loop_symbol
diff --git a/tests/test_walberla.py b/tests/test_walberla.py
index dc8ad0ca93c864db3eebc3bcce18fcb7e5c94dda..7b7c264b7701103ac32d35ee54a49b066141535f 100644
--- a/tests/test_walberla.py
+++ b/tests/test_walberla.py
@@ -8,7 +8,7 @@
 """
 import os
 import sys
-from os.path import dirname, join
+from os.path import dirname, expanduser, join
 
 import numpy as np
 
@@ -102,12 +102,14 @@ def test_global_idx():
         dh = GraphDataHandling((20, 30, 40))
         my_array = dh.add_array('my_array')
 
-        ast = pystencils.create_kernel([pystencils.Assignment(my_array.center, aabb_min_x)]).compile()
+        ast = pystencils.create_kernel([pystencils.Assignment(my_array.center, aabb_min_x + pystencils.x_)]).compile()
         dh.run_kernel(ast, simulate_only=True)
+        dh.save_fields('my_array', expanduser('~/foo'))
         # ast = pystencils.create_kernel([pystencils.Assignment(my_array.center, sum(current_global_idx))]).compile()
         # dh.run_kernel(ast, simulate_only=True)
 
         sim = Simulation(dh, ctx, cmake_target_name='foo')
+        sim._debug = False
         sim.write_files()
 
         dir = '/localhome/seitz_local/projects/walberla/apps/foo/'