diff --git a/src/pystencils_autodiff/backends/astnodes.py b/src/pystencils_autodiff/backends/astnodes.py
index 51e378afa3ab55cd6ae5c21353a1e8473463e8d2..343dd1036e671447386d6eac660bde584080da49 100644
--- a/src/pystencils_autodiff/backends/astnodes.py
+++ b/src/pystencils_autodiff/backends/astnodes.py
@@ -15,6 +15,7 @@ from os.path import dirname, exists, join
 
 from pystencils.astnodes import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol
 from pystencils.cpu.cpujit import get_cache_config, get_compiler_config
+from pystencils.gpucuda.cudajit import get_cubic_interpolation_include_paths
 from pystencils.include import get_pycuda_include_path, get_pystencils_include_path
 from pystencils_autodiff._file_io import read_template_from_file, write_file
 from pystencils_autodiff.backends.python_bindings import (
@@ -100,6 +101,10 @@ class TorchModule(JinjaCppFile):
 
         super().__init__(ast_dict)
 
+    @property
+    def kernel_wrappers(self):
+        return self.ast_dict['kernel_wrappers']
+
     @classmethod
     def generate_wrapper_function(cls, kernel_ast):
         return WrapperFunction(cls.DESTRUCTURING_CLASS(generate_kernel_call(kernel_ast)),
@@ -129,7 +134,8 @@ class TorchModule(JinjaCppFile):
                                extra_cuda_cflags=['-std=c++14', '-ccbin', get_compiler_config()['command']],
                                build_directory=build_dir,
                                extra_include_paths=[get_pycuda_include_path(),
-                                                    get_pystencils_include_path()])
+                                                    get_pystencils_include_path(),
+                                                    *get_cubic_interpolation_include_paths()])
         return torch_extension
 
 
diff --git a/src/pystencils_autodiff/framework_integration/printer.py b/src/pystencils_autodiff/framework_integration/printer.py
index 173104e1e6ddf3090faabb071c72ae6e07f68d8a..afa694eb20186127fe884f098a562f66f54e6b94 100644
--- a/src/pystencils_autodiff/framework_integration/printer.py
+++ b/src/pystencils_autodiff/framework_integration/printer.py
@@ -90,3 +90,12 @@ class FrameworkIntegrationPrinter(pystencils.backends.cbackend.CBackend):
             "\n" + self._indent + \
             ("\n" + self._indent).join(self._print(node.body).splitlines()) + \
             "\n}"
+
+    def _print_Timeloop(self, node):
+        children_string = '\n   '.join(self._print(c) for c in node.children)
+        return f"""for( {node.loop_symbol.dtype} {node.loop_symbol}={node.loop_start};  {node.loop_symbol}<= {node.loop_end} ; {node.loop_symbol}+= {node.loop_increment} )  {{
+    {children_string}
+}}"""  # noqa
+
+    def _print_SwapBuffer(self, node):
+        return f"""std::swap({node.first_array}, {node.second_array});"""
diff --git a/src/pystencils_autodiff/graph_datahandling.py b/src/pystencils_autodiff/graph_datahandling.py
new file mode 100644
index 0000000000000000000000000000000000000000..2125ac229ccda3b3989714638b9e9bdfbbc9c9da
--- /dev/null
+++ b/src/pystencils_autodiff/graph_datahandling.py
@@ -0,0 +1,209 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+from enum import Enum
+
+import numpy as np
+
+import pystencils.datahandling
+import pystencils.kernel_wrapper
+import pystencils.timeloop
+from pystencils.field import FieldType
+
+
+class DataTransferKind(str, Enum):
+    UNKNOWN = None
+    HOST_ALLOC = 'HOST_ALLOC'
+    DEVICE_ALLOC = 'DEVICE_ALLOC'
+    HOST_TO_DEVICE = 'HOST_TO_DEVICE'
+    DEVICE_TO_HOST = 'DEVICE_TO_HOST'
+    HOST_COMMUNICATION = 'HOST_COMMUNICATION'
+    DEVICE_COMMUNICATION = 'DEVICE_COMMUNICATION'
+    HOST_SWAP = 'HOST_SWAP'
+    DEVICE_SWAP = 'DEVICE_SWAP'
+    HOST_GATHER = 'HOST_GATHER'
+    DEVICE_GATHER = 'DEVICE_GATHER'
+
+    def is_alloc(self):
+        return self in [self.HOST_ALLOC, self.DEVICE_ALLOC]
+
+    def is_transfer(self):
+        return self in [self.HOST_TO_DEVICE, self.DEVICE_TO_HOST, self.SWAP]
+
+
+class DataTransfer:
+    def __init__(self, field: pystencils.Field, kind: DataTransferKind):
+        self.field = field
+        self.kind = kind
+
+    def __str__(self):
+        return f'DataTransferKind: {self.kind} with {self.field}'
+
+
+class Swap(DataTransfer):
+    def __init__(self, source, destination, gpu):
+        self.kind = DataTransferKind.DEVICE_SWAP if gpu else DataTransferKind.HOST_SWAP
+        self.field = source
+        self.destination = destination
+
+    def __str__(self):
+        return f'Swap: {self.field} with {self.destination}'
+
+
+class Communication(DataTransfer):
+    def __init__(self, field, stencil, gpu):
+        self.kind = DataTransferKind.DEVICE_COMMUNICATION if gpu else DataTransferKind.HOST_COMMUNICATION
+        self.field = field
+        self.stencil = stencil
+
+
+class KernelCall:
+    def __init__(self, kernel: pystencils.kernel_wrapper.KernelWrapper, kwargs):
+        self.kernel = kernel
+        self.kwargs = kwargs
+
+    def __str__(self):
+        return "Call " + str(self.kernel.ast.function_name)
+
+
+class TimeloopRun:
+    def __init__(self, timeloop, time_steps):
+        self.timeloop = timeloop
+        self.time_steps = time_steps
+
+    def __str__(self):
+        return (f'Timeloop:'
+                + '\nPre:\n'
+                + '\n   '.join(str(f) for f in self.timeloop._pre_run_functions)
+                + f'\n{self.time_steps} time steps:\n'
+                + '\n   '.join(str(f) for f in self.timeloop._single_step_asts)
+                + '\nPost:\n'
+                + '\n   '.join(str(f) for f in self.timeloop._post_run_functions))
+
+    @property
+    def asts(self):
+        return self.timeloop._single_step_asts
+
+
+class GraphDataHandling(pystencils.datahandling.SerialDataHandling):
+    """Docstring for GraphDataHandling. """
+
+    class TimeLoop(pystencils.timeloop.TimeLoop):
+        def __init__(self, parent, *args, **kwargs):
+            self.parent = parent
+            self._single_step_asts = []
+            super().__init__(*args, **kwargs)
+
+        def add_pre_run_function(self, f):
+            self._pre_run_functions.append(f)
+
+        def add_post_run_function(self, f):
+            self._post_run_functions.append(f)
+
+        def add_single_step_function(self, f):
+            self._single_step_functions.append(f)
+
+        def add_call(self, functor, argument_list):
+            for argument_dict in argument_list:
+                self._single_step_asts.append((functor, argument_dict) if not hasattr(functor, 'ast') else functor.ast)
+
+            if hasattr(functor, 'kernel'):
+                functor = functor.kernel
+            if not isinstance(argument_list, list):
+                argument_list = [argument_list]
+
+        def run(self, time_steps=1):
+            self.parent.call_queue.append(TimeloopRun(self, time_steps))
+            super().run(time_steps)
+
+    def __init__(self, *args, **kwargs):
+
+        self.call_queue = []
+        super().__init__(*args, **kwargs)
+
+    def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None, layout=None,
+                  cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
+
+        super().add_array(name,
+                          values_per_cell,
+                          dtype,
+                          latex_name,
+                          ghost_layers,
+                          layout,
+                          cpu,
+                          gpu,
+                          alignment,
+                          field_type)
+        if cpu:
+            self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.HOST_ALLOC))
+        if gpu:
+            self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.DEVICE_ALLOC))
+
+    def add_custom_data(self, name, cpu_creation_function,
+                        gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
+
+        self.call_queue.append('custom data. WTF?')
+        super().add_custom_data(name, cpu_creation_function,
+                                gpu_creation_function, cpu_to_gpu_transfer_func, gpu_to_cpu_transfer_func)
+
+    def gather_array(self, name, slice_obj=None, ghost_layers=False, **kwargs):
+
+        self.call_queue.append('gather_array')
+        super().gather_array(name, slice_obj, ghost_layers, **kwargs)
+
+    def swap(self, name1, name2, gpu=None):
+
+        self.call_queue.append(Swap(self._fields[name1], self._fields[name2], gpu))
+        super().swap(name1, name2, gpu)
+
+    def run_kernel(self, kernel_function, **kwargs):
+        self.call_queue.append(KernelCall(kernel_function, kwargs))
+        super().run_kernel(kernel_function, **kwargs)
+        # skip calling super
+
+    def to_cpu(self, name):
+        super().to_cpu(name)
+        self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.HOST_TO_DEVICE))
+
+    def to_gpu(self, name):
+        super().to_gpu(name)
+        if name in self._custom_data_transfer_functions:
+            self.call_queue.append('Custom Tranfer Function')
+        else:
+            self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.DEVICE_TO_HOST))
+
+    def synchronization_function(self, names, stencil=None, target=None, **_):
+        for name in names:
+            gpu = target == 'gpu'
+            self.call_queue.append(Communication(self._fields[name], stencil, gpu))
+        super().synchronization_function(names, stencil=None, target=None, **_)
+
+    def __str__(self):
+        return '\n'.join(str(c) for c in self.call_queue)
+
+    def create_timeloop(self, *args, **kwargs):
+        return self.TimeLoop(self, *args, **kwargs)
+
+    def fill(self, array_name: str, val, value_idx=None,
+             slice_obj=None, ghost_layers=False, inner_ghost_layers=False) -> None:
+        self.call_queue.append('Fill ' + array_name)
+        super().fill(array_name, val, value_idx, slice_obj, ghost_layers, inner_ghost_layers)
+
+    # TODO
+    # def reduce_float_sequence(self, sequence, operation, all_reduce=False) -> np.array:
+        # return np.array(sequence)
+
+    # def reduce_int_sequence(self, sequence, operation, all_reduce=False) -> np.array:
+        # return np.array(sequence)
+
+    # def create_vtk_writer(self, file_name, data_names, ghost_layers=False):
+        # pass
+
+    # def create_vtk_writer_for_flag_array(self, file_name, data_name, masks_to_name, ghost_layers=False):
+        # pass
diff --git a/src/pystencils_autodiff/timeloop_astnodes.py b/src/pystencils_autodiff/timeloop_astnodes.py
new file mode 100644
index 0000000000000000000000000000000000000000..81518e0da3170facdabfb0fc29e59628f80edc57
--- /dev/null
+++ b/src/pystencils_autodiff/timeloop_astnodes.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2020 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+
+import jinja2
+import sympy as sp
+
+from pystencils.astnodes import Node
+from pystencils_autodiff.framework_integration.astnodes import FunctionCall, JinjaCppFile
+
+
+class CustomNode(Node):
+    """Own custom base class for all AST nodes."""
+
+    def __init__(self, children):
+        self.children = children
+
+    @property
+    def args(self):
+        """Returns all arguments/children of this node."""
+        return self.children
+
+    @property
+    def symbols_defined(self):
+        return set()
+
+    @property
+    def undefined_symbols(self):
+        undefined = []
+
+        for c in self.args:
+            if hasattr(c, 'undefined_symbols'):
+                undefined.extend(c.undefined_symbols)
+            else:
+                try:
+                    c = sp.sympify(c)
+                    undefined.extend(c.free_symbols)
+                except Exception:
+                    pass
+
+        return set(undefined) - self.symbols_defined
+
+
+class Namespace(CustomNode):
+    """Base class for all AST nodes."""
+
+    def __init__(self, name, children):
+        self.name = name
+        self.children = children
+
+    @property
+    def undefined_symbols(self):
+        undefined = []
+
+        for c in self.children:
+            if hasattr(c, 'undefined_symbols'):
+                undefined.extend(c.undefined_symbols)
+
+        return set(undefined)
+
+
+class Timeloop(CustomNode):
+
+    def __init__(self, loop_symbol, loop_start, loop_end, loop_increment, children: FunctionCall, kernelAstArr=[]):
+        self.children = children
+        self.loop_symbol = loop_symbol
+        self.loop_start = loop_start
+        self.loop_end = loop_end
+        self.loop_increment = loop_increment
+        self.headers = ["<iostream>"]
+
+        self.kernelAstArr = kernelAstArr
+
+    @property
+    def args(self):
+        """Returns all arguments/children of this node."""
+        return (*self.children,
+                self.loop_symbol,
+                self.loop_start,
+                self.loop_end,
+                self.loop_increment
+                )
+
+    def symbolic_loop_args(self) -> set:
+        """ Returns all loop arguments which are not resolved """
+        loopArg = [self.loop_start,
+                   self.loop_end,
+                   self.loop_increment]
+        symboliArg = set()
+
+        for arg in loopArg:
+            if isinstance(arg, sp.Symbol):
+                symboliArg.add(arg)
+
+        return symboliArg
+
+    @property
+    def symbols_defined(self):
+        return {self.loop_symbol}
+
+    def atoms(self, arg_type):
+        """Returns a set of all descendants recursively, which are an instance of the given type."""
+        result = set()
+        for arg in self.args:
+            if isinstance(arg, arg_type):
+                result.add(arg)
+            if hasattr(arg, 'atoms'):
+                result.update(arg.atoms(arg_type))
+        return result
+
+
+class CppNamespace(JinjaCppFile):
+
+    TEMPLATE = jinja2.Template("""
+namespace {{ name }} {
+{% for c in children %}
+   {{ c | indent(3) }}
+{% endfor %}
+}""")
+
+    def __init__(self, name, children):
+
+        ast_dict = {
+            'name': name,
+            'children': children
+        }
+
+        super().__init__(ast_dict)
+
+
+class SwapBuffer(CustomNode):
+    def __init__(self, first_array, second_array):
+        self.first_array = first_array
+        self.second_array = second_array
+        self.headers = ["<utility>"]
+
+    @property
+    def args(self):
+        return (self.first_array, self.second_array)
+
diff --git a/tests/test_graph_datahandling.py b/tests/test_graph_datahandling.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b22070b8231b9ecca3e31b106ae93de690b3e48
--- /dev/null
+++ b/tests/test_graph_datahandling.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+
+import pytest
+
+try:
+    from lbmpy.boundaries import UBB, NoSlip
+    from lbmpy.lbstep import LatticeBoltzmannStep
+    from pystencils_autodiff.graph_datahandling import GraphDataHandling
+    from pystencils.slicing import slice_from_direction
+except ImportError:
+    pass
+
+pytest.importorskip('lbmpy')
+
+
+def create_lid_driven_cavity(domain_size=None, lid_velocity=0.005, lbm_kernel=None,
+                             data_handling=None, **kwargs):
+    """Creates a lid driven cavity scenario.
+
+    Args:
+        domain_size: tuple specifying the number of cells in each dimension
+        lid_velocity: x velocity of lid in lattice coordinates.
+        lbm_kernel: a LBM function, which would otherwise automatically created
+        kwargs: other parameters are passed on to the method, see :mod:`lbmpy.creationfunctions`
+        parallel: True for distributed memory parallelization with walberla
+        data_handling: see documentation of :func:`create_fully_periodic_flow`
+    Returns:
+        instance of :class:`Scenario`
+    """
+    assert domain_size is not None or data_handling is not None
+    if data_handling is None:
+        optimization = kwargs.get('optimization', None)
+        target = optimization.get('target', None) if optimization else None
+        data_handling = GraphDataHandling(domain_size,
+                                          periodicity=False,
+                                          default_ghost_layers=1,
+                                          default_target=target)
+    step = LatticeBoltzmannStep(data_handling=data_handling,
+                                lbm_kernel=lbm_kernel,
+                                name="ldc",
+                                timeloop_creation_function=data_handling.create_timeloop,
+                                **kwargs)
+
+    my_ubb = UBB(velocity=[lid_velocity, 0, 0][:step.method.dim])
+    step.boundary_handling.set_boundary(my_ubb, slice_from_direction('N', step.dim))
+    for direction in ('W', 'E', 'S') if step.dim == 2 else ('W', 'E', 'S', 'T', 'B'):
+        step.boundary_handling.set_boundary(NoSlip(), slice_from_direction(direction, step.dim))
+
+    return step
+
+
+def ldc_setup(**kwargs):
+    ldc = create_lid_driven_cavity(relaxation_rate=1.7, **kwargs)
+    ldc.run(50)
+    return ldc
+
+
+def test_graph_datahandling():
+
+    print("--- LDC 2D test ---")
+
+    opt_params = {'target': 'gpu', 'gpu_indexing_params': {'block_size': (8, 4, 2)}}
+    lbm_step: LatticeBoltzmannStep = ldc_setup(domain_size=(10, 15), optimization=opt_params)
+    print(lbm_step._data_handling)
+
+    print(lbm_step._data_handling.call_queue)