diff --git a/pystencils/autodiff/__init__.py b/pystencils/autodiff/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e137411f88c4e1a944d37b2f5747853fb8337043
--- /dev/null
+++ b/pystencils/autodiff/__init__.py
@@ -0,0 +1,14 @@
+from pystencils.autodiff._autodiff_astpair import AutoDiffAstPair
+from pystencils.autodiff.adjoint_field import (ADJOINT_FIELD_LATEX_HIGHLIGHT,
+                                               AdjointField)
+from pystencils.autodiff.autodiff import (AutoDiffOp,
+                                          create_backward_assignments,
+                                          get_jacobian_of_assignments)
+
+__all__ = [
+    'AutoDiffAstPair',
+    'ADJOINT_FIELD_LATEX_HIGHLIGHT',
+    'AdjointField',
+    'AutoDiffOp',
+    'create_backward_assignments',
+    'get_jacobian_of_assignments']
diff --git a/pystencils/autodiff/_autodiff_astpair.py b/pystencils/autodiff/_autodiff_astpair.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ab5843bcb4338fe8d9073cf1189aaab0d48775b
--- /dev/null
+++ b/pystencils/autodiff/_autodiff_astpair.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+import pystencils
+
+
+class AutoDiffAstPair:
+    """A pair of ASTs of forward and backward kernel.
+    Just needed, if compilation from AssignmentCollection is not sufficient and you want to manipulate the ASTs"""
+
+    def __init__(self, forward_ast, backward_ast, compilation_target='cpu'):
+        self.forward_ast = forward_ast
+        self.backward_ast = backward_ast
+        self._target = compilation_target
+        self._forward_kernel = pystencils.make_python_function(self.forward_ast, target=self._target)
+        self._backward_kernel = None
+
+    def backward(self, *args, **kwargs):
+        if not self._backward_kernel:
+            self._backward_kernel = pystencils.make_python_function(self.backward_ast, target=self._target)
+
+        return self._backward_kernel(*args, **kwargs)
+
+    def forward(self, *args, **kwargs):
+        return self._forward_kernel(*args, **kwargs)
+
+    def __call__(self, *args, **kwargs):
+        return self.forward(*args, **kwargs)
diff --git a/pystencils/autodiff/adjoint_field.py b/pystencils/autodiff/adjoint_field.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d1d2b07ebca1d17f090253a97db93fd2a87ba66
--- /dev/null
+++ b/pystencils/autodiff/adjoint_field.py
@@ -0,0 +1,31 @@
+import pystencils
+from pystencils.astnodes import FieldShapeSymbol, FieldStrideSymbol
+
+"""
+Determines how adjoint fields will be denoted in LaTeX output in terms of the forward field representation %s
+
+Default: r"\\hat{%s}"
+"""
+ADJOINT_FIELD_LATEX_HIGHLIGHT = r"\hat{%s}"
+
+
+class AdjointField(pystencils.Field):
+    """Field representing adjoint variables to a Field representing the forward variables"""
+
+    def __init__(self, forward_field, name_prefix='diff'):
+        new_name = name_prefix + forward_field.name
+        super(AdjointField, self).__init__(new_name, forward_field.field_type, forward_field._dtype,
+                                           forward_field._layout, forward_field.shape, forward_field.strides)
+        self.corresponding_forward_field = forward_field
+        self.name_prefix = name_prefix
+
+        # Eliminate references to forward fields that might not be present in backward kernels
+        self.shape = tuple(FieldShapeSymbol([self.name], s.coordinate) if
+                           isinstance(s, FieldShapeSymbol) else s for s in self.shape)
+        self.strides = tuple(FieldStrideSymbol(self.name, s.coordinate) if
+                             isinstance(s, FieldStrideSymbol) else s for s in self.strides)
+
+        if forward_field.latex_name:
+            self.latex_name = ADJOINT_FIELD_LATEX_HIGHLIGHT % forward_field.latex_name
+        else:
+            self.latex_name = ADJOINT_FIELD_LATEX_HIGHLIGHT % forward_field.name
diff --git a/pystencils/autodiff/autodiff.py b/pystencils/autodiff/autodiff.py
new file mode 100644
index 0000000000000000000000000000000000000000..11e39ca6342c818a532310cd225f53d08abda82e
--- /dev/null
+++ b/pystencils/autodiff/autodiff.py
@@ -0,0 +1,539 @@
+import collections
+from typing import List
+
+import numpy as np
+import sympy as sp
+
+import pystencils as ps
+import pystencils.autodiff
+from pystencils.autodiff.backends import AVAILABLE_BACKENDS
+
+"""
+Mode of backward differentiation
+(see https://autodiff-workshop.github.io/slides/Hueckelheim_nips_autodiff_CNN_PDE.pdf)
+"""
+AVAILABLE_DIFFMODES = ['transposed', 'transposed-forward']
+
+
+def get_jacobian_of_assignments(assignments, diff_variables):
+    """ Calculates the Jacobian of iterable of assignments wrt. to diff_variables
+
+    Arguments:
+        assignments {[type]} -- [description]
+        diff_variables {[type]} -- [description]
+
+    Returns:
+        sp.Matrix -- Jacobian of statements
+    """
+
+    if hasattr(assignments, 'main_assignments'):
+        assignments = assignments.main_assignments
+
+    rhs = sp.Matrix([e.rhs for e in assignments])
+    return rhs.jacobian(diff_variables)
+
+
+class AutoDiffOp(object):
+
+    def __init__(self,
+                 forward_assignments: List[ps.Assignment],
+                 op_name: str = "AutoDiffOp",
+                 time_constant_fields: List[ps.Field] = [],
+                 constant_fields: List[ps.Field] = [],
+                 diff_fields_prefix='diff',  # TODO: remove!
+                 do_common_subexpression_elimination=True,
+                 diff_mode='transposed',  # TODO: find good default
+                 backward_assignments=None,
+                 **kwargs):
+        diff_mode = diff_mode.lower()
+        assert diff_mode in AVAILABLE_DIFFMODES, "Please select a valid differentiation mode: %s" % AVAILABLE_DIFFMODES
+        self._additional_symbols = []
+
+        if 'target' in kwargs:
+            assert kwargs['target'].lower() in ['cpu', 'gpu'], \
+                "AutoDiffOp always supports both cpu and gpu"
+            del kwargs['target']
+
+        if isinstance(forward_assignments, list):
+            main_assignments = [a for a in forward_assignments if isinstance(a.lhs, pystencils.Field.Access)]
+            subexpressions = [a for a in forward_assignments if not isinstance(a.lhs, pystencils.Field.Access)]
+            forward_assignments = pystencils.AssignmentCollection(main_assignments, subexpressions)
+
+        self._forward_assignments = forward_assignments
+        self._constant_fields = constant_fields
+        self._time_constant_fields = time_constant_fields
+        self._kwargs = kwargs
+        self.op_name = op_name
+        self._forward_kernel_cpu = None
+        self._backward_kernel_cpu = None
+        self._forward_kernel_gpu = None
+        self._backward_kernel_gpu = None
+        self._do_common_subexpression_elimination = do_common_subexpression_elimination
+        if backward_assignments:
+            self._forward_assignments = forward_assignments
+            self._forward_read_accesses = None
+            self._forward_write_accesses = None
+            self._forward_input_fields = list(forward_assignments.free_fields)
+            self._forward_output_fields = list(forward_assignments.bound_fields)
+            self._backward_assignments = backward_assignments
+            self._backward_field_map = None
+            self._backward_input_fields = list(backward_assignments.free_fields)
+            self._backward_output_fields = list(backward_assignments.bound_fields)
+        else:
+            if diff_mode == 'transposed':
+                self._create_backward_assignments(diff_fields_prefix)
+            elif diff_mode == 'transposed-forward':
+                self._create_backward_assignments_tf_mad(diff_fields_prefix)
+            else:
+                raise NotImplementedError()
+
+    def __hash__(self):
+        # TODO make more precise
+        return hash(str(self.forward_assignments)) + hash(str(self.backward_assignments))
+
+    def _create_backward_assignments(self, diff_fields_prefix):
+        """
+        Performs automatic differentiation in the traditional adjoint/tangent way.
+        Forward writes become backward reads and vice-versa.
+        This can lead to problems when parallel reads lead to parallel writes,
+        and therefore to race conditions. Therefore, there's is also
+        `_create_backward_assignments_tf_mad` that can circumvent that
+        problem in the case of stencils that have only one output.
+        """
+
+        forward_assignments = self._forward_assignments
+
+        if hasattr(forward_assignments, 'new_without_subexpressions'):
+            forward_assignments = forward_assignments.new_without_subexpressions()
+        if hasattr(forward_assignments, 'main_assignments'):
+            forward_assignments = forward_assignments.main_assignments
+
+        if not hasattr(forward_assignments, 'free_symbols'):
+            forward_assignments = ps.AssignmentCollection(
+                forward_assignments, [])
+
+        read_field_accesses = [
+            a for a in forward_assignments.free_symbols if isinstance(a, ps.Field.Access)]
+        write_field_accesses = [a.lhs for a in forward_assignments]
+
+        assert all(isinstance(w, ps.Field.Access)
+                   for w in write_field_accesses), \
+            "Please assure that you only assign to fields in your main_assignments!"
+
+        read_fields = set(s.field for s in read_field_accesses)
+        write_fields = set(s.field for s in write_field_accesses)
+
+        # for every field create a corresponding diff field
+        diff_read_fields = {f: f.new_field_with_different_name(diff_fields_prefix + f.name)
+                            for f in read_fields}
+        diff_write_fields = {f: f.new_field_with_different_name(diff_fields_prefix + f.name)
+                             for f in write_fields}
+
+        # Translate field accesses from standard to diff fields
+        diff_read_field_accesses = [diff_read_fields[fa.field][fa.offsets](*fa.index)
+                                    for fa in read_field_accesses]
+        diff_write_field_accesses = [diff_write_fields[fa.field][fa.offsets](*fa.index)
+                                     for fa in write_field_accesses]
+
+        backward_assignments = []
+        for lhs, read_access in zip(diff_read_field_accesses, read_field_accesses):
+            # don't differentiate for constant fields
+            if read_access.field in self._constant_fields:
+                continue
+
+            rhs = sp.Matrix(sp.Matrix([e.rhs for e in forward_assignments])).diff(
+                read_access).transpose() * sp.Matrix(diff_write_field_accesses)
+            assert rhs.shape == (1, 1)
+            rhs = rhs[0, 0]
+
+            # if field is constant over we time we can accumulate in assignment
+            if read_access.field in self._time_constant_fields:
+                backward_assignments.append(ps.Assignment(lhs, lhs + rhs))
+            else:
+                backward_assignments.append(ps.Assignment(lhs, rhs))
+
+        try:
+            if self._do_common_subexpression_elimination:
+                backward_assignments = ps.simp.sympy_cse_on_assignment_list(
+                    backward_assignments)
+        except Exception:
+            pass
+
+        main_assignments = [a for a in backward_assignments if isinstance(a.lhs, pystencils.Field.Access)]
+        subexpressions = [a for a in backward_assignments if not isinstance(a.lhs, pystencils.Field.Access)]
+        backward_assignments = pystencils.AssignmentCollection(main_assignments, subexpressions)
+        assert backward_assignments.has_exclusive_writes, "Backward assignments don't have exclusive writes." + \
+            " You should consider using 'transposed-forward' mode for resolving those conflicts"
+
+        self._forward_assignments = forward_assignments
+        self._forward_read_accesses = read_field_accesses
+        self._forward_write_accesses = write_field_accesses
+        self._forward_input_fields = list(read_fields)
+        self._forward_output_fields = list(write_fields)
+        self._backward_assignments = backward_assignments
+        self._backward_field_map = {**diff_read_fields, **diff_write_fields}
+        self._backward_input_fields = [
+            self._backward_field_map[f] for f in self._forward_output_fields]
+        self._backward_output_fields = [
+            self._backward_field_map[f] for f in self._forward_input_fields]
+
+    def _create_backward_assignments_tf_mad(self, diff_fields_prefix):
+        """
+        Performs the automatic backward differentiation in a more fancy way
+        with write accesses like in the forward pass (only flipped).
+        It is called "transposed-mode forward-mode algorithmic differentiation" (TF-MAD).
+
+        See this presentation https://autodiff-workshop.github.io/slides/Hueckelheim_nips_autodiff_CNN_PDE.pdf or that
+        paper https://www.tandfonline.com/doi/full/10.1080/10556788.2018.1435654?scroll=top&needAccess=true
+        for more information
+
+        """
+
+        forward_assignments = self._forward_assignments
+        if hasattr(forward_assignments, 'new_without_subexpressions'):
+            forward_assignments = forward_assignments.new_without_subexpressions()
+        if hasattr(forward_assignments, 'main_assignments'):
+            forward_assignments = forward_assignments.main_assignments
+
+        if not hasattr(forward_assignments, 'free_symbols'):
+            forward_assignments = ps.AssignmentCollection(
+                forward_assignments, [])
+
+        read_field_accesses = [
+            a for a in forward_assignments.free_symbols if isinstance(a, ps.Field.Access)]
+        write_field_accesses = [a.lhs for a in forward_assignments]
+        read_fields = set(s.field for s in read_field_accesses)
+        write_fields = set(s.field for s in write_field_accesses)
+
+        self._forward_assignments = forward_assignments
+        self._forward_read_accesses = read_field_accesses
+        self._forward_write_accesses = write_field_accesses
+        self._forward_input_fields = list(read_fields)
+        self._forward_output_fields = list(write_fields)
+
+        read_field_accesses = [
+            a for a in forward_assignments.free_symbols if isinstance(a, ps.Field.Access)]
+        write_field_accesses = [a.lhs for a in forward_assignments]
+
+        # for every field create a corresponding diff field
+        diff_read_fields = {f: pystencils.autodiff.AdjointField(f, diff_fields_prefix)
+                            for f in read_fields}
+        diff_write_fields = {f: pystencils.autodiff.AdjointField(f, diff_fields_prefix)
+                             for f in write_fields}
+
+        assert all(isinstance(w, ps.Field.Access)
+                   for w in write_field_accesses), \
+            "Please check if your assignments are a AssignmentCollection or main_assignments only"
+
+        backward_assignment_dict = collections.OrderedDict()
+        # for each output of forward operation
+        for _, forward_assignment in enumerate(forward_assignments.main_assignments):
+            # we have only one assignment
+            diff_write_field = diff_write_fields[forward_assignment.lhs.field]
+            diff_write_index = forward_assignment.lhs.index
+
+            # TODO: simplify implementation. use matrix notation like in 'transposed' mode
+            for forward_read_field in self._forward_input_fields:
+                if forward_read_field in self._constant_fields:
+                    continue
+                diff_read_field = diff_read_fields[forward_read_field]
+
+                if diff_read_field.index_dimensions == 0:
+
+                    diff_read_field_sum = 0
+                    for ra in read_field_accesses:
+                        if ra.field != forward_read_field:
+                            continue  # ignore constant fields in differentiation
+
+                        # TF-MAD requires flipped stencils
+                        inverted_offset = tuple(-v - w for v,
+                                                w in zip(ra.offsets, forward_assignment.lhs.offsets))
+                        diff_read_field_sum += sp.diff(forward_assignment.rhs, ra) * \
+                            diff_write_field[inverted_offset](
+                                *diff_write_index)
+                    if forward_read_field in self._time_constant_fields:
+                        # Accumulate in case of time_constant_fields
+                        assignment = ps.Assignment(
+                            diff_read_field.center(), diff_read_field.center() + diff_read_field_sum)
+                    else:
+                        # If time dependent, we just need to assign the sum for the current time step
+                        assignment = ps.Assignment(
+                            diff_read_field.center(), diff_read_field_sum)
+
+                    # We can have contributions from multiple forward assignments
+                    if assignment.lhs in backward_assignment_dict:
+                        backward_assignment_dict[assignment.lhs].append(assignment.rhs)
+                    else:
+                        backward_assignment_dict[assignment.lhs] = [assignment.rhs]
+
+                elif diff_read_field.index_dimensions == 1:
+
+                    diff_read_field_sum = [0] * diff_read_field.index_shape[0]
+                    for ra in read_field_accesses:
+                        if ra.field != forward_read_field:
+                            continue  # ignore constant fields in differentiation
+
+                        # TF-MAD requires flipped stencils
+                        inverted_offset = tuple(-v - w for v,
+                                                w in zip(ra.offsets, write_field_accesses[0].offsets))
+                        diff_read_field_sum[ra.index[0]] += sp.diff(forward_assignment.rhs, ra) * \
+                            diff_write_field[inverted_offset]
+
+                    for index in range(diff_read_field.index_shape[0]):
+                        if forward_read_field in self._time_constant_fields:
+                            # Accumulate in case of time_constant_fields
+                            assignment = ps.Assignment(
+                                diff_read_field.center_vector[index],
+                                diff_read_field.center_vector[index] + diff_read_field_sum[index])
+                        else:
+                            # If time dependent, we just need to assign the sum for the current time step
+                            assignment = ps.Assignment(
+                                diff_read_field.center_vector[index], diff_read_field_sum[index])
+
+                    if assignment.lhs in backward_assignment_dict:
+                        backward_assignment_dict[assignment.lhs].append(assignment.rhs)
+                    else:
+                        backward_assignment_dict[assignment.lhs] = [assignment.rhs]
+                else:
+                    raise NotImplementedError()
+
+        backward_assignments = [ps.Assignment(
+            k, sp.Add(*v)) for k, v in backward_assignment_dict.items()]
+
+        try:
+
+            if self._do_common_subexpression_elimination:
+                backward_assignments = ps.simp.sympy_cse_on_assignment_list(
+                    backward_assignments)
+        except Exception:
+            pass
+            # print("Common subexpression elimination failed")
+            # print(err)
+        main_assignments = [a for a in backward_assignments if isinstance(a.lhs, pystencils.Field.Access)]
+        subexpressions = [a for a in backward_assignments if not isinstance(a.lhs, pystencils.Field.Access)]
+        backward_assignments = pystencils.AssignmentCollection(main_assignments, subexpressions)
+
+        assert backward_assignments.has_exclusive_writes, "Backward assignments don't have exclusive writes!"
+
+        self._backward_assignments = backward_assignments
+        self._backward_field_map = {**diff_read_fields, **diff_write_fields}
+        self._backward_input_fields = [
+            self._backward_field_map[f] for f in self._forward_output_fields]
+        self._backward_output_fields = [
+            self._backward_field_map[f] for f in self._forward_input_fields]
+
+    @property
+    def forward_assignments(self):
+        return self._forward_assignments
+
+    @property
+    def backward_assignments(self):
+        return self._backward_assignments
+
+    def get_forward_kernel(self, is_gpu):
+        if is_gpu:
+            return self.forward_kernel_gpu
+        else:
+            return self.forward_kernel_cpu
+
+    def get_backward_kernel(self, is_gpu):
+        if is_gpu:
+            return self.backward_kernel_gpu
+        else:
+            return self.backward_kernel_cpu
+
+    def jacobian(self):
+        """Calculates the Jacobian of the forward_assignments with respect to forward read accesses"""
+        return get_jacobian_of_assignments(self._forward_assignments, self._forward_read_accesses)
+
+    @property
+    def forward_write_accesses(self):
+        return self._forward_write_accesses
+
+    @property
+    def forward_read_accesses(self):
+        return self._forward_read_accesses
+
+    @property
+    def backward_write_accesses(self):
+        return [a.rhs for a in self.backward_assignments if isinstance(a, ps.Field.Access)]
+
+    @property
+    def backward_read_accesses(self):
+        return [a for a in self.backward_assignments.free_symbols if isinstance(a, ps.Field.Access)]
+
+    def forward_kernel_cpu(self):
+        if not self._forward_kernel_cpu:
+            self._forward_kernel_cpu = ps.create_kernel(
+                self._forward_assignments, **self._kwargs).compile()
+        return self._forward_kernel_cpu
+
+    @property
+    def forward_kernel_gpu(self):
+        if not self._forward_kernel_gpu:
+            self._forward_kernel_gpu = ps.create_kernel(
+                self._forward_assignments, target='gpu', **self._kwargs).compile()
+        return self._forward_kernel_gpu
+
+    @property
+    def backward_kernel_cpu(self):
+        if not self._backward_kernel_cpu:
+            self._backward_kernel_cpu = ps.create_kernel(
+                self._backward_assignments, target='cpu', **self._kwargs).compile()
+        return self._backward_kernel_cpu
+
+    @property
+    def backward_kernel_gpu(self):
+        if not self._backward_kernel_gpu:
+            self._backward_kernel_gpu = ps.create_kernel(
+                self._backward_assignments, target='gpu', **self._kwargs).compile()
+        return self._backward_kernel_gpu
+
+    @property
+    def backward_fields_map(self):
+        return self._backward_field_map
+
+    @property
+    def backward_input_fields(self):
+        return self._backward_input_fields
+
+    @property
+    def backward_output_fields(self):
+        return self._backward_output_fields
+
+    @property
+    def backward_fields(self):
+        return self._backward_output_fields + self._backward_input_fields
+
+    @property
+    def forward_fields(self):
+        return self._forward_output_fields + self._forward_input_fields
+
+    @property
+    def forward_input_fields(self):
+        return self._forward_input_fields
+
+    @property
+    def forward_output_fields(self):
+        return self._forward_output_fields
+
+    def create_forward_kernel(self, *args, **kwargs):
+        return ps.create_kernel(
+            self._forward_assignments, *args, **kwargs).compile()
+
+    def create_backward_kernel(self, *args, **kwargs):
+        return ps.create_kernel(
+            self._backward_assignments, *args, **kwargs).compile()
+
+    @property
+    def constant_fields(self):
+        return self._constant_fields
+
+    @property
+    def time_constant_fields(self):
+        return self._time_constant_fields
+
+    def create_tensorflow_op(self, inputfield_tensor_dict, forward_loop=None, backward_loop=None, backend='tensorflow'):
+        """
+        Creates custom differentiable Tensorflow Op from assignments.
+        Will return either a single output tensor or a OrderedDict[field_name -> tf.Tensor] in case of multiple outputs
+        """
+        backend = backend.lower()
+        assert backend in AVAILABLE_BACKENDS, "\"%s\" is not a. Available backends: %s" % (
+            backend, AVAILABLE_BACKENDS)
+
+        additional_fields = [f for f in inputfield_tensor_dict.keys(
+        ) if f not in self._forward_input_fields]
+
+        for f in additional_fields:
+            if f and isinstance(f, ps.Field):
+                f_adjoint = ps.autodiff.AdjointField(f)
+                self._forward_input_fields.append(f)
+                self._backward_output_fields.append(f_adjoint)
+                self._backward_field_map[f] = f_adjoint
+
+        if not forward_loop:
+
+            def forward_function(**kwargs):
+                for field in self.forward_input_fields:
+                    kwargs[field.name] = pystencils.autodiff._layout_fixer.fix_layout(
+                        kwargs[field.name], field, backend)
+                # TODO: check dangerous function `as_strided`
+                for s in self._additional_symbols:
+                    kwargs[s.name] = getattr(self, s.name)
+                rtn = {f.name: np.lib.stride_tricks.as_strided(np.zeros(f.shape, dtype=f.dtype.numpy_dtype),
+                                                               f.shape,
+                                                               [f.dtype.numpy_dtype.itemsize * a for a in f.strides])
+                       for f in self.forward_output_fields}
+
+                kwargs.update(rtn)
+                self.forward_kernel_cpu(**kwargs)
+                return rtn
+
+            forward_loop = forward_function
+
+        if not backward_loop:
+
+            def backward_function(**kwargs):
+                for field in self.backward_input_fields + self.forward_input_fields:
+                    kwargs[field.name] = pystencils.autodiff._layout_fixer.fix_layout(
+                        kwargs[field.name], field, backend)
+                for s in self._additional_symbols:
+                    kwargs[s.name] = getattr(self, s.name)
+
+                rtn = {f.name: np.lib.stride_tricks.as_strided(np.zeros(f.shape, dtype=f.dtype.numpy_dtype),
+                                                               f.shape,
+                                                               [f.dtype.numpy_dtype.itemsize * a for a in f.strides])
+                       for f in self.backward_output_fields}
+
+                kwargs.update(rtn)
+
+                self.backward_kernel_cpu(**kwargs)
+                return rtn
+
+            backward_loop = backward_function
+
+        if backend == 'tensorflow':
+            import pystencils.autodiff.backends._tensorflow
+            op = pystencils.autodiff.backends._tensorflow.tensorflowop_from_autodiffop(
+                self, inputfield_tensor_dict, forward_loop, backward_loop)
+        elif backend == 'torch':
+            import pystencils.autodiff.backends._pytorch
+            op = pystencils.autodiff.backends._pytorch.create_autograd_function(
+                self, inputfield_tensor_dict, forward_loop, backward_loop)
+        elif backend == 'torch_native':
+            import pystencils.autodiff.backends._torch_native
+            op = pystencils.autodiff.backends._torch_native.create_autograd_function(
+                self, inputfield_tensor_dict, None, None)
+        else:
+            raise NotImplementedError()
+
+        if backend == 'tensorflow':
+            if len(op) == 1:
+                return op[0]
+            else:
+                rtn = collections.OrderedDict()
+                for field, tensor in zip(self.forward_output_fields, op):
+                    if backend == 'tensorflow' and field.has_fixed_shape:
+                        tensor.set_shape(field.shape)
+                    rtn[field.name] = tensor
+                return rtn
+        else:
+            return op
+
+
+def create_backward_assignments(forward_assignments,
+                                diff_fields_prefix="diff",
+                                time_constant_fields=[],
+                                constant_fields=[],
+                                diff_mode=AVAILABLE_DIFFMODES[0],
+                                do_common_sub_expression_elimination=True):
+    auto_diff = AutoDiffOp(forward_assignments,
+                           diff_fields_prefix=diff_fields_prefix,
+                           time_constant_fields=time_constant_fields,
+                           constant_fields=constant_fields,
+                           diff_mode=diff_mode,
+                           do_common_subexpression_elimination=do_common_sub_expression_elimination)
+    backward_assignments = auto_diff.backward_assignments
+
+    return backward_assignments
diff --git a/pystencils/autodiff/backends.py b/pystencils/autodiff/backends.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf69d91fe1b78a273571748a1cdf0dec0594536b
--- /dev/null
+++ b/pystencils/autodiff/backends.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+Backends for generating auto-differentiable operations with pystencils.autodiff
+
+This module will be populated by `pystencils_tensorflow`, `pystencils_torch` if available
+"""
+
+AVAILABLE_BACKENDS = []
+
+try:
+    import pystencils_tensorflow  # NOQA
+except Exception:
+    pass
+
+try:
+    import pystencils_torch  # NOQA
+except Exception:
+    pass
+
+__all__ = ['pystencils_tensorflow', 'pystencils_torch']
diff --git a/pystencils_tests/autodiff/test_autodiff.py b/pystencils_tests/autodiff/test_autodiff.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c5e3246d6ed71cece3d90e897e953076d55985e
--- /dev/null
+++ b/pystencils_tests/autodiff/test_autodiff.py
@@ -0,0 +1,51 @@
+import sympy as sp
+
+import pystencils as ps
+import pystencils.autodiff
+
+
+def test_simple_2d_check_assignment_collection():
+    # use simply example
+    z, x, y = ps.fields("z, y, x: [2d]")
+
+    forward_assignments = ps.AssignmentCollection([ps.Assignment(
+        z[0, 0], x[0, 0]*sp.log(x[0, 0]*y[0, 0]))], [])
+
+    jac = pystencils.autodiff.get_jacobian_of_assignments(
+        forward_assignments, [x[0, 0], y[0, 0]])
+
+    assert jac.shape == (len(forward_assignments.bound_symbols),
+                         len(forward_assignments.free_symbols))
+    print(repr(jac))
+    assert repr(jac) == 'Matrix([[log(x_C*y_C) + 1, y_C/x_C]])'
+
+    pystencils.autodiff.create_backward_assignments(
+        forward_assignments)
+    pystencils.autodiff.create_backward_assignments(
+        pystencils.autodiff.create_backward_assignments(forward_assignments))
+
+
+def test_simple_2d_check_raw_assignments():
+    # use simply example
+    z, x, y = ps.fields("z, y, x: [2d]")
+
+    forward_assignments = \
+        [ps.Assignment(z[0, 0], x[0, 0]*sp.log(x[0, 0]*y[0, 0]))]
+
+    jac = pystencils.autodiff.get_jacobian_of_assignments(
+        forward_assignments, [x[0, 0], y[0, 0]])
+
+    assert jac.shape == (1, 2)
+    assert repr(jac) == 'Matrix([[log(x_C*y_C) + 1, y_C/x_C]])'
+
+    pystencils.autodiff.create_backward_assignments(
+        forward_assignments)
+
+
+def main():
+    test_simple_2d_check_assignment_collection()
+    test_simple_2d_check_raw_assignments()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pystencils_tests/autodiff/test_tfmad.py b/pystencils_tests/autodiff/test_tfmad.py
new file mode 100644
index 0000000000000000000000000000000000000000..d754e7cee3f3e1fc820695d66c1970eeb15ad103
--- /dev/null
+++ b/pystencils_tests/autodiff/test_tfmad.py
@@ -0,0 +1,246 @@
+import pystencils as ps
+import sympy
+import numpy as np
+import argparse
+import sympy as sp
+import pystencils.autodiff
+
+
+def test_tfmad_stencil():
+
+    f, out = ps.fields("f, out: double[2D]")
+
+    cont = ps.fd.Diff(f, 0) - ps.fd.Diff(f, 1)
+    discretize = ps.fd.Discretization2ndOrder(dx=1)
+    discretization = discretize(cont)
+
+    assignment = ps.Assignment(out.center(), discretization)
+    assignment_collection = ps.AssignmentCollection([assignment], [])
+    print('Forward')
+    print(assignment_collection)
+
+    print('Backward')
+    backward = ps.autodiff.create_backward_assignments(
+        assignment_collection, diff_mode='transposed-forward')
+    print(backward)
+
+
+def test_tfmad_two_stencils():
+
+    a, b, out = ps.fields("a, b, out: double[2D]")
+
+    cont = ps.fd.Diff(a, 0) - ps.fd.Diff(a, 1) - \
+        ps.fd.Diff(b, 0) + ps.fd.Diff(b, 1)
+    discretize = ps.fd.Discretization2ndOrder(dx=1)
+    discretization = discretize(cont)
+
+    assignment = ps.Assignment(out.center(), discretization)
+    assignment_collection = ps.AssignmentCollection([assignment], [])
+    print('Forward')
+    print(assignment_collection)
+
+    print('Backward')
+    auto_diff = pystencils.autodiff.AutoDiffOp(
+        assignment_collection, diff_mode='transposed-forward')
+    backward = auto_diff.backward_assignments
+    print(backward)
+    print('Forward output fields (to check order)')
+    print(auto_diff.forward_input_fields)
+
+
+
+
+def check_tfmad_vector_input_data(args):
+    dtype = args.dtype
+    domain_shape = args.domain_shape
+    ndim = len(domain_shape)
+
+    # create arrays
+    c_arr = np.zeros(domain_shape)
+    v_arr = np.zeros(domain_shape + (ndim,))
+
+    # create fields
+    c, v, c_next = ps.fields(
+        "c, v(2), c_next: %s[%i,%i]" % ("float" if dtype == np.float32 else "double", domain_shape[0], domain_shape[1]), c=c_arr, v=v_arr, c_next=c_arr)
+
+    # write down advection diffusion pde
+    # the equation is represented by a single term and an implicit "=0" is assumed.
+    adv_diff_pde = ps.fd.transient(
+        c) - ps.fd.diffusion(c, sp.Symbol("D")) + ps.fd.advection(c, v)
+
+    discretize = ps.fd.Discretization2ndOrder(args.dx, args.dt)
+    discretization = discretize(adv_diff_pde)
+    discretization = discretization.subs(
+        sp.Symbol("D"), args.diffusion_coefficient)
+    forward_assignments = ps.AssignmentCollection(
+        [ps.Assignment(c_next.center(), discretization)], [])
+
+    autodiff = pystencils.autodiff.AutoDiffOp(
+        forward_assignments, diff_mode='transposed-forward')  # , constant_fields=[v]
+
+    print('Forward assignments:')
+    print(autodiff.forward_assignments)
+    print('Backward assignments:')
+    print(autodiff.backward_assignments)
+
+
+def test_tfmad_vector_input_data():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--domain_shape', default=(100, 30), nargs=2, type=int, help="")
+    parser.add_argument(
+        '--dx', default=1, type=float, help="")
+    parser.add_argument(
+        '--dt', default=0.01, type=float, help="")
+    parser.add_argument(
+        '--diffusion_coefficient', default=1, type=float, help="")
+    parser.add_argument(
+        '--num_total_time_steps', default=100, type=int)
+    parser.add_argument(
+        '--num_time_steps_for_op', default=1, type=int)
+    parser.add_argument(
+        '--learning_rate', default=1e-2, type=float)
+    parser.add_argument(
+        '--dtype', default=np.float64, type=np.dtype)
+    parser.add_argument(
+        '--num_optimization_steps', default=2000, type=int)
+
+    args = parser.parse_args()
+    check_tfmad_vector_input_data(args)
+
+
+# def test_tfmad_gradient_check():
+    # a, b, out = ps.fields("a, b, out: double[21,13]")
+
+    # cont = ps.fd.Diff(a, 0) - ps.fd.Diff(a, 1) - \
+        # ps.fd.Diff(b, 0) + ps.fd.Diff(b, 1)
+    # discretize = ps.fd.Discretization2ndOrder(dx=1)
+    # discretization = discretize(cont)
+
+    # assignment = ps.Assignment(out.center(), discretization)
+    # assignment_collection = ps.AssignmentCollection([assignment], [])
+    # print('Forward')
+    # print(assignment_collection)
+
+    # print('Backward')
+    # auto_diff = pystencils.autodiff.AutoDiffOp(
+        # assignment_collection, diff_mode='transposed-forward')
+    # backward = auto_diff.backward_assignments
+    # print(backward)
+    # print('Forward output fields (to check order)')
+    # print(auto_diff.forward_input_fields)
+
+    # a_tensor = tf.Variable(np.zeros(a.shape, a.dtype.numpy_dtype))
+    # b_tensor = tf.Variable(np.zeros(a.shape, a.dtype.numpy_dtype))
+    # out_tensor = auto_diff.create_tensorflow_op({a: a_tensor, b: b_tensor})
+
+    # with tf.Session() as sess:
+        # sess.run(tf.global_variables_initializer())
+
+        # gradient_error = pystencils._tensorflow_utils.compute_gradient_error_without_border(
+            # [a_tensor, b_tensor], [a.shape, b.shape], out_tensor, out.shape, num_border_pixels=2, ndim=2)
+        # print('error: %s' % gradient_error.max_error)
+
+        # assert gradient_error < 1e-4
+
+
+# def test_tfmad_gradient_check_torch():
+    # a, b, out = ps.fields("a, b, out: float[21,13]")
+
+    # cont = ps.fd.Diff(a, 0) - ps.fd.Diff(a, 1) - \
+        # ps.fd.Diff(b, 0) + ps.fd.Diff(b, 1)
+    # discretize = ps.fd.Discretization2ndOrder(dx=1)
+    # discretization = discretize(cont)
+
+    # assignment = ps.Assignment(out.center(), discretization)
+    # assignment_collection = ps.AssignmentCollection([assignment], [])
+    # print('Forward')
+    # print(assignment_collection)
+
+    # print('Backward')
+    # auto_diff = pystencils.autodiff.AutoDiffOp(
+        # assignment_collection, diff_mode='transposed-forward')
+    # backward = auto_diff.backward_assignments
+    # print(backward)
+    # print('Forward output fields (to check order)')
+    # print(auto_diff.forward_input_fields)
+
+    # a_tensor = torch.zeros(
+        # *a.shape, dtype=torch.float64, requires_grad=True)
+    # b_tensor = torch.zeros(
+        # *b.shape, dtype=torch.float64, requires_grad=True)
+    # function = auto_diff.create_tensorflow_op(
+        # {a: a_tensor, b: b_tensor}, backend='torch')
+
+    # torch.autograd.gradcheck(function.apply, [a_tensor, b_tensor])
+
+
+def get_curl(input_field: ps.Field, curl_field: ps.Field):
+    """Return a ps.AssignmentCollection describing the calculation of
+    the curl given a 2d or 3d vector field [z,y,x](f) or [y,x](f)
+
+    Note that the curl of a 2d vector field is defined in ℝ3!
+    Only the non-zero z-component is returned
+
+    Arguments:
+        field {ps.Field} -- A field with index_dimensions <= 1
+            Scalar fields are interpreted as a z-component
+
+    Raises:
+        NotImplementedError -- [description]
+        NotImplementedError -- Only support 2d or 3d vector fields or scalar fields are supported
+
+    Returns:
+        ps.AssignmentCollection -- AssignmentCollection describing the calculation of the curl
+    """
+    assert input_field.index_dimensions <= 1, "Must be a vector or a scalar field"
+    assert curl_field.index_dimensions == 1, "Must be a vector field"
+    discretize = ps.fd.Discretization2ndOrder(dx=1)
+
+    if input_field.index_dimensions == 0:
+        dy = ps.fd.Diff(input_field, 0)
+        dx = ps.fd.Diff(input_field, 1)
+        f_x = ps.Assignment(curl_field.center(0), discretize(dy))
+        f_y = ps.Assignment(curl_field.center(1), discretize(dx))
+        return ps.AssignmentCollection([f_x, f_y], [])
+
+    else:
+
+        if input_field.index_shape[0] == 2:
+            raise NotImplementedError()
+
+        elif input_field.index_shape[0] == 3:
+            raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+
+
+def test_tfmad_two_outputs():
+
+    domain_shape = (20, 30)
+    vector_shape = domain_shape + (2,)
+
+    curl_input_for_u = ps.Field.create_fixed_size(
+        field_name='curl_input', shape=domain_shape, index_dimensions=0)
+    u_field = ps.Field.create_fixed_size(
+        field_name='curl', shape=vector_shape, index_dimensions=1)
+
+    curl_op = pystencils.autodiff.AutoDiffOp(get_curl(
+        curl_input_for_u, u_field), diff_mode="transposed-forward")
+
+    print('Forward')
+    print(curl_op.forward_assignments)
+
+    print('Backward')
+    print(curl_op.backward_assignments)
+
+
+def main():
+    test_tfmad_stencil()
+    test_tfmad_two_stencils()
+    test_tfmad_vector_input_data()
+    test_tfmad_two_outputs()
+
+
+if __name__ == '__main__':
+    main()