diff --git a/pystencils/__init__.py b/pystencils/__init__.py
index a7e21703b48c7398da8be6f609f0f3123f7822d1..cdfa7f3cc9b19cd044e17136c406f4cecffa7755 100644
--- a/pystencils/__init__.py
+++ b/pystencils/__init__.py
@@ -1,17 +1,19 @@
 """Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
 from . import sympy_gmpy_bug_workaround  # NOQA
-from . import fd
 from . import stencil as stencil
-from .assignment import Assignment, assignment_from_stencil
-from .data_types import TypedSymbol
-from .datahandling import create_data_handling
-from .display_utils import show_code, to_dot
 from .field import Field, FieldType, fields
-from .kernel_decorator import kernel
-from .kernelcreation import create_indexed_kernel, create_kernel, create_staggered_kernel
-from .simp import AssignmentCollection
+from .data_types import TypedSymbol, matrix_symbols, typed_symbols
 from .slicing import make_slice
+from .kernelcreation import create_kernel, create_indexed_kernel, create_staggered_kernel
+from .display_utils import show_code, to_dot
+from .simp import AssignmentCollection
+from .assignment import Assignment, assignment_from_stencil
 from .sympyextensions import SymbolCreator
+from .datahandling import create_data_handling
+from .kernel_decorator import kernel
+from . import fd
+from .spatial_coordinates import (x_, x_staggered, x_staggered_vector, x_vector,
+                                  y_, y_staggered, z_, z_staggered)
 
 __all__ = ['Field', 'FieldType', 'fields',
            'TypedSymbol',
@@ -25,4 +27,8 @@ __all__ = ['Field', 'FieldType', 'fields',
            'create_data_handling',
            'kernel',
            'fd',
+           'x_', 'y_', 'z_',
+           'x_staggered', 'y_staggered', 'z_staggered',
+           'x_vector', 'x_staggered_vector',
+           'typed_symbols', 'matrix_symbols',
            'stencil']
diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py
index 7e43c1a8a9a3fe7efd092840226e46f529648df3..a8c34cff6ac3fb873f1e50e0ab508056e0aab9b1 100644
--- a/pystencils/astnodes.py
+++ b/pystencils/astnodes.py
@@ -33,7 +33,7 @@ class Node:
         raise NotImplementedError()
 
     def subs(self, subs_dict) -> None:
-        """Inplace! substitute, similar to sympy's but modifies the AST inplace."""
+        """Inplace! Substitute, similar to sympy's but modifies the AST inplace."""
         for a in self.args:
             a.subs(subs_dict)
 
@@ -102,7 +102,8 @@ class Conditional(Node):
         result = self.true_block.undefined_symbols
         if self.false_block:
             result.update(self.false_block.undefined_symbols)
-        result.update(self.condition_expr.atoms(sp.Symbol))
+        if hasattr(self.condition_expr, 'atoms'):
+            result.update(self.condition_expr.atoms(sp.Symbol))
         return result
 
     def __str__(self):
@@ -227,7 +228,8 @@ class KernelFunction(Node):
             return ()
 
         argument_symbols = self._body.undefined_symbols - self.global_variables
-        parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
+        parameters = [self.Parameter(symbol, get_fields(symbol))
+                      for symbol in argument_symbols]
         if hasattr(self, 'indexing'):
             parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
         parameters.sort(key=lambda p: p.symbol.name)
diff --git a/pystencils/backends/cuda_backend.py b/pystencils/backends/cuda_backend.py
index 3c1a3cd8bf9629c0b003cc8b6c38693c4426e282..915d5601b5fe7bc05b9833c21da62915b22552e5 100644
--- a/pystencils/backends/cuda_backend.py
+++ b/pystencils/backends/cuda_backend.py
@@ -1,7 +1,12 @@
 from os.path import dirname, join
 
+import numpy as np
+import sympy
+from sympy.core.compatibility import string_types
+
 from pystencils.astnodes import Node
 from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c
+from pystencils.data_types import cast_func, create_type, get_type_of_expression
 from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
 
 with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f:
@@ -43,11 +48,19 @@ class CudaBackend(CBackend):
         return code
 
     def _print_TextureDeclaration(self, node):
-        code = "texture<%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % (
-            str(node.texture.field.dtype),
-            node.texture.field.spatial_dimensions,
-            node.texture
-        )
+
+        if node.texture.field.dtype.numpy_dtype.itemsize > 4:
+            code = "texture<fp_tex_%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % (
+                str(node.texture.field.dtype),
+                node.texture.field.spatial_dimensions,
+                node.texture
+            )
+        else:
+            code = "texture<%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % (
+                str(node.texture.field.dtype),
+                node.texture.field.spatial_dimensions,
+                node.texture
+            )
         return code
 
     def _print_SkipIteration(self, _):
@@ -62,17 +75,23 @@ class CudaSympyPrinter(CustomSympyPrinter):
         self.known_functions.update(CUDA_KNOWN_FUNCTIONS)
 
     def _print_TextureAccess(self, node):
+        dtype = node.texture.field.dtype.numpy_dtype
 
         if node.texture.cubic_bspline_interpolation:
             template = "cubicTex%iDSimple<%s>(%s, %s)"
         else:
-            template = "tex%iD<%s>(%s, %s)"
+            if dtype.itemsize > 4:
+                # Use PyCuda hack!
+                # https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp
+                template = "fp_tex%iD(%s, %s)"
+            else:
+                template = "tex%iD(%s, %s)"
 
         code = template % (
             node.texture.field.spatial_dimensions,
-            str(node.texture.field.dtype),
             str(node.texture),
-            ', '.join(self._print(o) for o in node.offsets)
+            # + 0.5 comes from Nvidia's staggered indexing
+            ', '.join(self._print(o + 0.5) for o in reversed(node.offsets))
         )
         return code
 
@@ -84,3 +103,78 @@ class CudaSympyPrinter(CustomSympyPrinter):
         elif isinstance(expr, fast_inv_sqrt):
             return "__frsqrt_rn(%s)" % tuple(self._print(a) for a in expr.args)
         return super()._print_Function(expr)
+
+    def _print_Pow(self, expr):
+        if np.issubdtype(get_type_of_expression(expr.args[0],
+                                                default_float_type=self._float_type).numpy_dtype, np.integer):
+            expr._args[0] = cast_func(expr.args[0], create_type(self._float_type))
+
+        if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
+            return "(" + self._print(sympy.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")"
+        elif expr.exp.is_integer and expr.exp.is_number and - 8 < expr.exp < 0:
+            return "1 / ({})".format(self._print(sympy.Mul(*[expr.base] * (-expr.exp), evaluate=False)))
+        else:
+
+            try:
+                suffix = self._get_func_suffix(get_type_of_expression(
+                    expr.args[0], default_float_type=self._float_type).sympy_dtype)
+            except Exception:
+                suffix = ""
+            if expr.exp == -1:
+                return '1.0%s/%s' % (suffix.upper(), self.parenthesize(expr.base, sympy.PREC))
+            elif expr.exp == 0.5:
+                return '%ssqrt%s(%s)' % (self._ns, suffix, self._print(expr.base))
+            elif expr.exp == sympy.S.One / 3 and self.standard != 'C89':
+                return '%scbrt%s(%s)' % (self._ns, suffix, self._print(expr.base))
+            else:
+                return '%spow%s(%s, %s)' % (self._ns, suffix, self._print(expr.base),
+                                            self._print(expr.exp))
+
+    def _print_math_func(self, expr, nest=False, known=None):
+        if np.issubdtype(get_type_of_expression(expr.args[0],
+                                                default_float_type=self._float_type).numpy_dtype, np.integer):
+            expr._args = (cast_func(expr.args[0], self._float_type), *expr.args[1:])
+
+        if len(expr.args) == 2 and \
+                (get_type_of_expression(expr.args[0], self._float_type) !=  # NOQA
+                 get_type_of_expression(expr.args[1], default_float_type=self._float_type)):
+            expr._args = (expr.args[0], cast_func(expr.args[1], get_type_of_expression(
+                expr.args[0], default_float_type=self._float_type)))
+
+        if known is None:
+            known = self.known_functions[expr.__class__.__name__]
+        if not isinstance(known, string_types):
+            for cb, name in known:
+                if cb(*expr.args):
+                    known = name
+                    break
+            else:
+                raise ValueError("No matching printer")
+        suffix = self._get_func_suffix(
+            get_type_of_expression(expr.args[0], default_float_type=self._float_type
+                                   ).sympy_dtype) if self._ns + known in self._prec_funcs else ''
+
+        if nest:
+            args = self._print(expr.args[0])
+            if len(expr.args) > 1:
+                paren_pile = ''
+                for curr_arg in expr.args[1:-1]:
+                    paren_pile += ')'
+                    args += ', {ns}{name}{suffix}({next}'.format(
+                        ns=self._ns,
+                        name=known,
+                        suffix=suffix,
+                        next=self._print(curr_arg)
+                    )
+                args += ', %s%s' % (
+                    self._print(expr.func(expr.args[-1])),
+                    paren_pile
+                )
+        else:
+            args = ', '.join(map(lambda arg: self._print(arg), expr.args))
+        return '{ns}{name}{suffix}({args})'.format(
+            ns=self._ns,
+            name=known,
+            suffix=suffix,
+            args=args
+        )
diff --git a/pystencils/backends/cuda_known_functions.txt b/pystencils/backends/cuda_known_functions.txt
index 42cf554ada57d9055bddc659d53c4eaff664275a..224f4a49d65322446a6b17c696351f9371435887 100644
--- a/pystencils/backends/cuda_known_functions.txt
+++ b/pystencils/backends/cuda_known_functions.txt
@@ -45,6 +45,7 @@ tex1D
 tex2D
 tex3D
 
+sqrtf
 rsqrtf
 cbrtf
 rcbrtf
diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py
index fc6765e589e341b40ebbc778c7c26839531ae826..7859c537fe3fbe50a2e94bb2c2c67622f1c1f852 100644
--- a/pystencils/cpu/kernelcreation.py
+++ b/pystencils/cpu/kernelcreation.py
@@ -10,8 +10,8 @@ from pystencils.data_types import BasicType, StructType, TypedSymbol, create_typ
 from pystencils.field import Field, FieldType
 from pystencils.transformations import (
     add_types, filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering,
-    make_loop_over_domain, move_constants_before_loop, parse_base_pointer_info,
-    resolve_buffer_accesses, resolve_field_accesses, split_inner_loop)
+    implement_interpolations, make_loop_over_domain, move_constants_before_loop,
+    parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, split_inner_loop)
 
 AssignmentOrAstNodeList = List[Union[Assignment, ast.Node]]
 
@@ -67,6 +67,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
                                                         ghost_layers=ghost_layers, loop_order=loop_order)
     ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function,
                               ghost_layers=ghost_layer_info, function_name=function_name)
+    implement_interpolations(body)
 
     if split_groups:
         typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups]
@@ -139,6 +140,8 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
     loop_body = Block([])
     loop_node = LoopOverCoordinate(loop_body, coordinate_to_loop_over=0, start=0, stop=index_fields[0].shape[0])
 
+    implement_interpolations(loop_node)
+
     for assignment in assignments:
         loop_body.append(assignment)
 
diff --git a/pystencils/data_types.py b/pystencils/data_types.py
index 6bae2da8de109085421dab6a5896f96e7bf9c3e2..ef2ef558db830752264d08d7113cb6c3ee3fe99c 100644
--- a/pystencils/data_types.py
+++ b/pystencils/data_types.py
@@ -1,10 +1,14 @@
 import ctypes
+import uuid
+from typing import Tuple
 
 import numpy as np
 import sympy as sp
+import sympy.codegen.ast
 from sympy.core.cache import cacheit
 from sympy.logic.boolalg import Boolean
 
+import pystencils
 from pystencils.cache import memorycache
 from pystencils.utils import all_equal
 
@@ -15,6 +19,30 @@ except ImportError as e:
     _ir_importerror = e
 
 
+def typed_symbols(names, dtype, *args):
+    symbols = sp.symbols(names, *args)
+    if isinstance(symbols, Tuple):
+        return tuple(TypedSymbol(str(s), dtype) for s in symbols)
+    else:
+        return TypedSymbol(str(symbols), dtype)
+
+
+def matrix_symbols(names, dtype, rows, cols):
+    if isinstance(names, str):
+        names = names.replace(' ', '').split(',')
+
+    matrices = []
+    for n in names:
+        symbols = typed_symbols("%s:%i" % (n, rows * cols), dtype)
+        matrices.append(sp.Matrix(rows, cols, lambda i, j: symbols[i * cols + j]))
+
+    return tuple(matrices)
+
+
+def get_dummy_symbol(dtype=None):
+    return pystencils.typed_symbols('dummy%s' % uuid.uuid4().hex, dtype or create_type('bool'))
+
+
 # noinspection PyPep8Naming
 class address_of(sp.Function):
     is_Atom = True
@@ -329,12 +357,17 @@ def peel_off_type(dtype, type_to_peel_off):
     return dtype
 
 
-def collate_types(types):
+def collate_types(types, forbid_collation_to_float=False):
     """
     Takes a sequence of types and returns their "common type" e.g. (float, double, float) -> double
     Uses the collation rules from numpy.
     """
 
+    if forbid_collation_to_float:
+        types = [t for t in types if not (hasattr(t, 'is_float') and t.is_float())]
+        if not types:
+            return [create_type('int32')]
+
     # Pointer arithmetic case i.e. pointer + integer is allowed
     if any(type(t) is PointerType for t in types):
         pointer_type = None
@@ -370,17 +403,19 @@ def collate_types(types):
 
 
 @memorycache(maxsize=2048)
-def get_type_of_expression(expr):
+def get_type_of_expression(expr, default_float_type="double", default_int_type='int'):
     from pystencils.astnodes import ResolvedFieldAccess
     from pystencils.cpu.vectorization import vec_all, vec_any
 
     expr = sp.sympify(expr)
     if isinstance(expr, sp.Integer):
-        return create_type("int")
+        return create_type(default_int_type)
     elif isinstance(expr, sp.Rational) or isinstance(expr, sp.Float):
-        return create_type("double")
+        return create_type(default_float_type)
     elif isinstance(expr, ResolvedFieldAccess):
         return expr.field.dtype
+    elif isinstance(expr, pystencils.field.Field.AbstractAccess):
+        return expr.field.dtype
     elif isinstance(expr, TypedSymbol):
         return expr.dtype
     elif isinstance(expr, sp.Symbol):
@@ -401,16 +436,23 @@ def get_type_of_expression(expr):
     elif isinstance(expr, sp.boolalg.Boolean) or isinstance(expr, sp.boolalg.BooleanFunction):
         # if any arg is of vector type return a vector boolean, else return a normal scalar boolean
         result = create_type("bool")
-        vec_args = [get_type_of_expression(a) for a in expr.args if isinstance(get_type_of_expression(a), VectorType)]
+        vec_args = [get_type_of_expression(a)
+                    for a in expr.args if isinstance(get_type_of_expression(a), VectorType)]
         if vec_args:
             result = VectorType(result, width=vec_args[0].width)
         return result
     elif isinstance(expr, sp.Pow):
         return get_type_of_expression(expr.args[0])
     elif isinstance(expr, sp.Expr):
-        types = tuple(get_type_of_expression(a) for a in expr.args)
-        return collate_types(types)
-
+        expr: sp.Expr
+        if expr.args:
+            types = tuple(get_type_of_expression(a) for a in expr.args)
+            return collate_types(types)
+        else:
+            if expr.is_integer:
+                return create_type(default_int_type)
+            else:
+                return create_type(default_float_type)
     raise NotImplementedError("Could not determine type for", expr, type(expr))
 
 
@@ -463,6 +505,10 @@ class BasicType(Type):
     def numpy_dtype(self):
         return self._dtype
 
+    @property
+    def sympy_dtype(self):
+        return getattr(sympy.codegen.ast, str(self.numpy_dtype))
+
     @property
     def item_size(self):
         return 1
diff --git a/pystencils/field.py b/pystencils/field.py
index b6437a971398c70c0bcf392129269b87c912f0b8..be0f2b3801fd280156abce7fda7f3ecb20bf3885 100644
--- a/pystencils/field.py
+++ b/pystencils/field.py
@@ -1,4 +1,6 @@
+import functools
 import hashlib
+import operator
 import pickle
 import re
 from enum import Enum
@@ -9,6 +11,7 @@ import numpy as np
 import sympy as sp
 from sympy.core.cache import cacheit
 
+import pystencils
 from pystencils.alignedarray import aligned_empty
 from pystencils.data_types import StructType, TypedSymbol, create_type
 from pystencils.kernelparameters import FieldShapeSymbol, FieldStrideSymbol
@@ -38,7 +41,6 @@ def fields(description=None, index_dimensions=0, layout=None, **kwargs):
             >>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype
             >>> assert v.index_shape == (2,)
 
-
         Format string can be left out, field names are taken from keyword arguments.
             >>> fields(f1=arr_s, f2=arr_s)
             [f1, f2]
@@ -292,6 +294,10 @@ class Field(AbstractField):
         self.shape = shape
         self.strides = strides
         self.latex_name = None  # type: Optional[str]
+        self.coordinate_origin = sp.Matrix(tuple(
+            0 for _ in range(self.spatial_dimensions)
+        ))  # type: tuple[float,sp.Symbol]
+        self.coordinate_transform = sp.eye(self.spatial_dimensions)
 
     def new_field_with_different_name(self, new_name):
         if self.has_fixed_shape:
@@ -312,6 +318,9 @@ class Field(AbstractField):
     def ndim(self) -> int:
         return len(self.shape)
 
+    def values_per_cell(self) -> int:
+        return functools.reduce(operator.mul, self.index_shape, 1)
+
     @property
     def layout(self):
         return self._layout
@@ -409,6 +418,34 @@ class Field(AbstractField):
             return False
         return self.hashable_contents() == other.hashable_contents()
 
+    @property
+    def physical_coordinates(self):
+        return self.coordinate_transform @ (self.coordinate_origin + pystencils.x_vector(self.spatial_dimensions))
+
+    @property
+    def physical_coordinates_staggered(self):
+        return self.coordinate_transform @ \
+            (self.coordinate_origin + pystencils.x_staggered_vector(self.spatial_dimensions))
+
+    def index_to_physical(self, index_coordinates, staggered=False):
+        if staggered:
+            index_coordinates = sp.Matrix([i + 0.5 for i in index_coordinates])
+        return self.coordinate_transform @ (self.coordinate_origin + index_coordinates)
+
+    def physical_to_index(self, physical_coordinates, staggered=False):
+        rtn = self.coordinate_transform.inv() @ physical_coordinates - self.coordinate_origin
+        if staggered:
+            rtn = sp.Matrix([i - 0.5 for i in rtn])
+
+        return rtn
+
+    def index_to_staggered_physical_coordinates(self, symbol_vector):
+        symbol_vector += sp.Matrix([0.5] * self.spatial_dimensions)
+        return self.create_physical_coordinates(symbol_vector)
+
+    def set_coordinate_origin_to_field_center(self):
+        self.coordinate_origin = -sp.Matrix([i / 2 for i in self.spatial_shape])
+
     # noinspection PyAttributeOutsideInit,PyUnresolvedReferences
     class Access(TypedSymbol, AbstractField.AbstractAccess):
         """Class representing a relative access into a `Field`.
@@ -429,11 +466,12 @@ class Field(AbstractField):
             >>> central_y_component.at_index(0)  # change component
             v_C^0
         """
+
         def __new__(cls, name, *args, **kwargs):
             obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs)
             return obj
 
-        def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False):
+        def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False, dtype=None):
             field_name = field.name
             offsets_and_index = (*offsets, *idx) if idx is not None else offsets
             constant_offsets = not any([isinstance(o, sp.Basic) and not o.is_Integer for o in offsets_and_index])
@@ -484,7 +522,7 @@ class Field(AbstractField):
             return obj
 
         def __getnewargs__(self):
-            return self.field, self.offsets, self.index, self.is_absolute_access
+            return self.field, self.offsets, self.index, self.is_absolute_access, self.dtype
 
         # noinspection SpellCheckingInspection
         __xnew__ = staticmethod(__new_stage2__)
@@ -503,7 +541,7 @@ class Field(AbstractField):
             if len(idx) != self.field.index_dimensions:
                 raise ValueError("Wrong number of indices: "
                                  "Got %d, expected %d" % (len(idx), self.field.index_dimensions))
-            return Field.Access(self.field, self._offsets, idx)
+            return Field.Access(self.field, self._offsets, idx, dtype=self.dtype)
 
         def __getitem__(self, *idx):
             return self.__call__(*idx)
@@ -562,7 +600,7 @@ class Field(AbstractField):
             """
             offset_list = list(self.offsets)
             offset_list[coord_id] += offset
-            return Field.Access(self.field, tuple(offset_list), self.index)
+            return Field.Access(self.field, tuple(offset_list), self.index, dtype=self.dtype)
 
         def get_shifted(self, *shift) -> 'Field.Access':
             """Returns a new Access with changed spatial coordinates
@@ -572,7 +610,10 @@ class Field(AbstractField):
                 >>> f[0,0].get_shifted(1, 1)
                 f_NE
             """
-            return Field.Access(self.field, tuple(a + b for a, b in zip(shift, self.offsets)), self.index)
+            return Field.Access(self.field,
+                                tuple(a + b for a, b in zip(shift, self.offsets)),
+                                self.index,
+                                dtype=self.dtype)
 
         def at_index(self, *idx_tuple) -> 'Field.Access':
             """Returns new Access with changed index.
@@ -582,7 +623,7 @@ class Field(AbstractField):
                 >>> f(0).at_index(8)
                 f_C^8
             """
-            return Field.Access(self.field, self.offsets, idx_tuple)
+            return Field.Access(self.field, self.offsets, idx_tuple, dtype=self.dtype)
 
         @property
         def is_absolute_access(self) -> bool:
diff --git a/pystencils/gpucuda/cudajit.py b/pystencils/gpucuda/cudajit.py
index 92738e1e7df9c35dca87ff5f3de84ceeb5bcad8a..ee78b4663c4f40ebf3de0f96ebd405046a75de3c 100644
--- a/pystencils/gpucuda/cudajit.py
+++ b/pystencils/gpucuda/cudajit.py
@@ -1,9 +1,13 @@
+from os.path import dirname, isdir, join
+
 import numpy as np
 
 from pystencils.backends.cbackend import generate_c, get_headers
 from pystencils.data_types import StructType
 from pystencils.field import FieldType
+from pystencils.gpucuda.texture_utils import ndarray_to_tex
 from pystencils.include import get_pystencils_include_path
+from pystencils.interpolation_astnodes import TextureAccess
 from pystencils.kernelparameters import FieldPointerSymbol
 
 USE_FAST_MATH = True
@@ -29,17 +33,30 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
     if argument_dict is None:
         argument_dict = {}
 
-    header_list = ['<stdint.h>'] + list(get_headers(kernel_function_node))
+    header_list = ['<cstdint>', '<pycuda-helper-modified.hpp>'] + list(get_headers(kernel_function_node))
     includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
 
     code = includes + "\n"
     code += "#define FUNC_PREFIX __global__\n"
     code += "#define RESTRICT __restrict__\n\n"
     code += str(generate_c(kernel_function_node, dialect='cuda', custom_backend=custom_backend))
-    options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
+    textures = set(d.texture for d in kernel_function_node.atoms(TextureAccess))
+
+    nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
     if USE_FAST_MATH:
-        options.append("-use_fast_math")
-    mod = SourceModule(code, options=options, include_dirs=[get_pystencils_include_path()])
+        nvcc_options.append("-use_fast_math")
+
+    if any(t.cubic_bspline_interpolation for t in textures):
+        assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
+            "Submodule CubicInterpolationCUDA does not exist"
+        nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
+        nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
+
+        needed_dims = set(t.field.spatial_dimensions for t in textures if t.cubic_bspline_interpolation)
+        for i in needed_dims:
+            code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code
+
+    mod = SourceModule(code, options=nvcc_options, include_dirs=[get_pystencils_include_path()])
     func = mod.get_function(kernel_function_node.function_name)
 
     parameters = kernel_function_node.get_parameters()
@@ -63,6 +80,12 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
             block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block'])
             block_and_thread_numbers['grid'] = tuple(int(i) for i in block_and_thread_numbers['grid'])
 
+            # TODO: use texture objects:
+            # https://devblogs.nvidia.com/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/
+            for tex in textures:
+                tex_ref = mod.get_texref(str(tex))
+                ndarray_to_tex(tex_ref, full_arguments[tex.field.name], tex.address_mode,
+                               tex.filter_mode, tex.use_normalized_coordinates, tex.read_as_integer)
             args = _build_numpy_argument_list(parameters, full_arguments)
             cache[key] = (args, block_and_thread_numbers)
             cache_values.append(kwargs)  # keep objects alive such that ids remain unique
diff --git a/pystencils/gpucuda/kernelcreation.py b/pystencils/gpucuda/kernelcreation.py
index ff82107000b00595dbcca6699ed42b172c324353..ed916d54b8d4434e0e38beb68dd8ecb933aff567 100644
--- a/pystencils/gpucuda/kernelcreation.py
+++ b/pystencils/gpucuda/kernelcreation.py
@@ -4,12 +4,18 @@ from pystencils.field import Field, FieldType
 from pystencils.gpucuda.cudajit import make_python_function
 from pystencils.gpucuda.indexing import BlockIndexing
 from pystencils.transformations import (
-    add_types, get_base_buffer_index, get_common_shape, parse_base_pointer_info,
-    resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols)
-
-
-def create_cuda_kernel(assignments, function_name="kernel", type_info=None, indexing_creator=BlockIndexing,
-                       iteration_slice=None, ghost_layers=None, skip_independence_check=False):
+    add_types, get_base_buffer_index, get_common_shape, implement_interpolations,
+    parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols)
+
+
+def create_cuda_kernel(assignments,
+                       function_name="kernel",
+                       type_info=None,
+                       indexing_creator=BlockIndexing,
+                       iteration_slice=None,
+                       ghost_layers=None,
+                       skip_independence_check=False,
+                       use_textures_for_interpolation=True):
     fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check)
     all_fields = fields_read.union(fields_written)
     read_only_fields = set([f.name for f in fields_read - fields_written])
@@ -57,6 +63,8 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde
     ast = KernelFunction(block, 'gpu', 'gpucuda', make_python_function, ghost_layers, function_name)
     ast.global_variables.update(indexing.index_variables)
 
+    implement_interpolations(ast, implement_by_texture_accesses=use_textures_for_interpolation)
+
     base_pointer_spec = [['spatialInner0']]
     base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0],
                                                          f.spatial_dimensions, f.index_dimensions)
@@ -86,8 +94,13 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde
     return ast
 
 
-def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel", type_info=None,
-                                coordinate_names=('x', 'y', 'z'), indexing_creator=BlockIndexing):
+def created_indexed_cuda_kernel(assignments,
+                                index_fields,
+                                function_name="kernel",
+                                type_info=None,
+                                coordinate_names=('x', 'y', 'z'),
+                                indexing_creator=BlockIndexing,
+                                use_textures_for_interpolation=True):
     fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False)
     all_fields = fields_read.union(fields_written)
     read_only_fields = set([f.name for f in fields_read - fields_written])
@@ -125,6 +138,8 @@ def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel
     ast = KernelFunction(function_body, 'gpu', 'gpucuda', make_python_function, None, function_name)
     ast.global_variables.update(indexing.index_variables)
 
+    implement_interpolations(ast, implement_by_texture_accesses=use_textures_for_interpolation)
+
     coord_mapping = indexing.coordinates
     base_pointer_spec = [['spatialInner0']]
     base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0],
diff --git a/pystencils/gpucuda/texture_utils.py b/pystencils/gpucuda/texture_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d29d862929530b27f840da445467a2bae44b1bfb
--- /dev/null
+++ b/pystencils/gpucuda/texture_utils.py
@@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+
+from os.path import dirname, isdir, join
+
+import numpy as np
+
+try:
+    import pycuda.driver as cuda
+    from pycuda import gpuarray
+except Exception:
+    pass
+
+
+def pow_two_divider(n):
+    if n == 0:
+        return 0
+    divider = 1
+    while (n & divider) == 0:
+        divider <<= 1
+    return divider
+
+
+def ndarray_to_tex(tex_ref,
+                   ndarray,
+                   address_mode=None,
+                   filter_mode=None,
+                   use_normalized_coordinates=False,
+                   read_as_integer=False):
+
+    if address_mode is None:
+        address_mode = cuda.address_mode.BORDER
+    if filter_mode is None:
+        filter_mode = cuda.filter_mode.LINEAR
+
+    if isinstance(ndarray, np.ndarray):
+        cu_array = cuda.np_to_array(ndarray, 'C')
+    elif isinstance(ndarray, gpuarray.GPUArray):
+        cu_array = cuda.gpuarray_to_array(ndarray, 'C')
+    else:
+        raise TypeError(
+            'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray')
+
+    cuda.TextureReference.set_array(tex_ref, cu_array)
+
+    tex_ref.set_address_mode(0, address_mode)
+    if ndarray.ndim >= 2:
+        tex_ref.set_address_mode(1, address_mode)
+    if ndarray.ndim >= 3:
+        tex_ref.set_address_mode(2, address_mode)
+    tex_ref.set_filter_mode(filter_mode)
+
+    if not use_normalized_coordinates:
+        tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_NORMALIZED_COORDINATES)
+
+    if not read_as_integer:
+        tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER)
+
+
+def prefilter_for_cubic_bspline(gpuarray):
+    import pycuda.autoinit  # NOQA
+    from pycuda.compiler import SourceModule
+
+    ndim = gpuarray.ndim
+    assert ndim == 2 or ndim == 3, "Only 2d or 3d supported"
+    assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
+        "Submodule CubicInterpolationCUDA does not exist"
+    nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
+    nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
+    nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
+
+    file_name = join(dirname(__file__), "CubicInterpolationCUDA", "code", "cubicPrefilter%iD.cu" % ndim)
+    with open(file_name) as file:
+        code = file.read()
+
+    mod = SourceModule(code, options=nvcc_options)
+
+    if ndim == 2:
+        height, width = gpuarray.shape
+        block = min(pow_two_divider(height), 64)
+        grid = height // block
+        func = mod.get_function('SamplesToCoefficients2DXf')
+        func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
+                                                          for r in reversed(gpuarray.shape)),
+             block=(block, 1, 1),
+             grid=(grid, 1, 1))
+
+        block = min(pow_two_divider(width), 64)
+        grid = width // block
+        func = mod.get_function('SamplesToCoefficients2DYf')
+        func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
+                                                          for r in reversed(gpuarray.shape)),
+             block=(block, 1, 1),
+             grid=(grid, 1, 1))
+    elif ndim == 3:
+        depth, height, width = gpuarray.shape
+        dimX = min(min(pow_two_divider(width), pow_two_divider(height)), 64)
+        dimY = min(min(pow_two_divider(depth), pow_two_divider(height)), 512 / dimX)
+        block = (dimX, dimY, 1)
+
+        dimGridX = (height // block[0], depth // block[1], 1)
+        dimGridY = (width // block[0], depth // block[1], 1)
+        dimGridZ = (width // block[0], height // block[1], 1)
+
+        func = mod.get_function("SamplesToCoefficients3DXf")
+        func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
+                                                          for r in reversed(gpuarray.shape)),
+             block=block,
+             grid=dimGridX)
+        func = mod.get_function("SamplesToCoefficients3DYf")
+        func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
+                                                          for r in reversed(gpuarray.shape)),
+             block=block,
+             grid=dimGridY)
+        func = mod.get_function("SamplesToCoefficients3DZf")
+        func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
+                                                          for r in reversed(gpuarray.shape)),
+             block=block,
+             grid=dimGridZ)
diff --git a/pystencils/include/pycuda-helper-modified.hpp b/pystencils/include/pycuda-helper-modified.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..53b3797016b73779c1a6beba09f91d8ac9fe8dfc
--- /dev/null
+++ b/pystencils/include/pycuda-helper-modified.hpp
@@ -0,0 +1,249 @@
+#include <pycuda-complex.hpp>
+#include <surface_functions.h>
+#ifndef _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP
+#define _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP
+
+extern "C++" {
+  // "double-precision" textures ------------------------------------------------
+  /* Thanks to Nathan Bell <nbell@nvidia.com> for help in figuring this out. */
+
+  typedef float fp_tex_float;
+  typedef int2 fp_tex_double;
+  typedef uint2 fp_tex_cfloat;
+  typedef int4 fp_tex_cdouble;
+
+   template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<float> fp_tex1Dfetch(texture<fp_tex_cfloat, 1, read_mode> tex,  float i)
+  {
+    fp_tex_cfloat v = tex1Dfetch(tex, i);
+    pycuda::complex<float> out;
+    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<double> fp_tex1Dfetch(texture<fp_tex_cdouble, 1, read_mode> tex,  float i)
+  {
+    fp_tex_cdouble v = tex1Dfetch(tex, i);
+    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ double fp_tex1Dfetch(texture<fp_tex_double, 1, read_mode> tex,  float i)
+  {
+    fp_tex_double v = tex1Dfetch(tex, i);
+    return __hiloint2double(v.y, v.x);
+  }
+
+// 2D functionality
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ double fp_tex2D(texture<fp_tex_double, 2, read_mode> tex, float i,  float j)
+  {
+    fp_tex_double v = tex2D(tex, i, j);
+    return __hiloint2double(v.y, v.x);
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<float> fp_tex2D(texture<fp_tex_cfloat, 2, read_mode> tex,  float i,  float j)
+  {
+    fp_tex_cfloat v = tex2D(tex, i, j);
+    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<double> fp_tex2D(texture<fp_tex_cdouble, 2, read_mode> tex,  float i,  float j)
+  {
+    fp_tex_cdouble v = tex2D(tex, i, j);
+    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
+  }
+  // 2D Layered extension
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ double fp_tex2DLayered(texture<fp_tex_double, cudaTextureType2DLayered, read_mode> tex, float i,  float j, int layer)
+  {
+    fp_tex_double v = tex2DLayered(tex, i, j, layer);
+    return __hiloint2double(v.y, v.x);
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<float> fp_tex2DLayered(texture<fp_tex_cfloat, cudaTextureType2DLayered, read_mode> tex, float i,  float j, int layer)
+  {
+    fp_tex_cfloat v = tex2DLayered(tex, i, j, layer);
+    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<double> fp_tex2DLayered(texture<fp_tex_cdouble, cudaTextureType2DLayered, read_mode> tex, float i,  float j, int layer)
+  {
+    fp_tex_cdouble v = tex2DLayered(tex, i, j, layer);
+    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
+  }
+
+  // 3D functionality
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ double fp_tex3D(texture<fp_tex_double, 3, read_mode> tex,  float i,  float j,  float k)
+  {
+    fp_tex_double v = tex3D(tex, i, j, k);
+    return __hiloint2double(v.y, v.x);
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<float> fp_tex3D(texture<fp_tex_cfloat, 3, read_mode> tex,  float i,  float j,  float k)
+  {
+    fp_tex_cfloat v = tex3D(tex, i, j, k);
+    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<double> fp_tex3D(texture<fp_tex_cdouble, 3, read_mode> tex,  float i,  float j,  float k)
+  {
+    fp_tex_cdouble v = tex3D(tex, i, j, k);
+    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
+  }
+
+  // FP_Surfaces with complex supprt
+
+  __device__ void fp_surf2DLayeredwrite(double var,surface<void, cudaSurfaceType2DLayered> surf,  float i,  float j, int layer, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_double auxvar;
+    auxvar.x =  __double2loint(var);
+    auxvar.y =  __double2hiint(var);
+    surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_double), j, layer, mode);
+  }
+
+  __device__ void fp_surf2DLayeredwrite(pycuda::complex<float> var,surface<void, cudaSurfaceType2DLayered> surf,  float i,  float j, int layer, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_cfloat auxvar;
+    auxvar.x =  __float_as_int(var._M_re);
+    auxvar.y =  __float_as_int(var._M_im);
+    surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_cfloat), j, layer,mode);
+  }
+
+  __device__ void fp_surf2DLayeredwrite(pycuda::complex<double> var,surface<void, cudaSurfaceType2DLayered> surf,  float i,  float j, int layer, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_cdouble auxvar;
+    auxvar.x =  __double2loint(var._M_re);
+    auxvar.y =  __double2hiint(var._M_re);
+
+    auxvar.z = __double2loint(var._M_im);
+    auxvar.w = __double2hiint(var._M_im);
+    surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_cdouble), j, layer,mode);
+  }
+
+  __device__ void fp_surf3Dwrite(double var,surface<void, 3> surf,  float i,  float j,  float k, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_double auxvar;
+    auxvar.x =  __double2loint(var);
+    auxvar.y =  __double2hiint(var);
+    surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_double), j, k,mode);
+  }
+
+  __device__ void fp_surf3Dwrite(pycuda::complex<float> var,surface<void, 3> surf,  float i,  float j,  float k, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_cfloat auxvar;
+    auxvar.x =  __float_as_int(var._M_re);
+    auxvar.y =  __float_as_int(var._M_im);
+
+    surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_cfloat), j, k, mode);
+  }
+
+  __device__ void fp_surf3Dwrite(pycuda::complex<double> var,surface<void, 3> surf,  float i,  float j,  float k, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_cdouble auxvar;
+    auxvar.x =  __double2loint(var._M_re);
+    auxvar.y =  __double2hiint(var._M_re);
+
+    auxvar.z = __double2loint(var._M_im);
+    auxvar.w = __double2hiint(var._M_im);
+    surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_cdouble), j, k, mode);
+  }
+
+  __device__ void fp_surf2DLayeredread(double *var, surface<void, cudaSurfaceType2DLayered> surf,  float i,  float j, int layer, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_double v;
+    surf2DLayeredread(&v, surf, i*sizeof(fp_tex_double), j, layer, mode);
+    *var = __hiloint2double(v.y, v.x);
+  }
+
+  __device__ void fp_surf2DLayeredread(pycuda::complex<float> *var, surface<void, cudaSurfaceType2DLayered> surf,  float i,  float j, int layer, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_cfloat v;
+    surf2DLayeredread(&v, surf, i*sizeof(fp_tex_cfloat), j, layer, mode);
+    *var = pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
+  }
+
+  __device__ void fp_surf2DLayeredread(pycuda::complex<double> *var, surface<void, cudaSurfaceType2DLayered> surf,  float i,  float j, int layer, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_cdouble v;
+    surf2DLayeredread(&v, surf, i*sizeof(fp_tex_cdouble), j, layer, mode);
+    *var = pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
+  }
+
+  __device__ void fp_surf3Dread(double *var, surface<void, 3> surf,  float i,  float j,  float k, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_double v;
+    surf3Dread(&v, surf, i*sizeof(fp_tex_double), j, k, mode);
+    *var = __hiloint2double(v.y, v.x);
+  }
+
+  __device__ void fp_surf3Dread(pycuda::complex<float> *var, surface<void, 3> surf,  float i,  float j,  float k, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_cfloat v;
+    surf3Dread(&v, surf, i*sizeof(fp_tex_cfloat), j, k, mode);
+    *var = pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
+  }
+
+  __device__ void fp_surf3Dread(pycuda::complex<double> *var, surface<void, 3> surf,  float i,  float j,  float k, enum cudaSurfaceBoundaryMode mode)
+  {
+    fp_tex_cdouble v;
+    surf3Dread(&v, surf, i*sizeof(fp_tex_cdouble), j, k, mode);
+    *var = pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
+  }
+#define PYCUDA_GENERATE_FP_TEX_FUNCS(TYPE) \
+  template <enum cudaTextureReadMode read_mode> \
+  __device__ TYPE fp_tex1Dfetch(texture<TYPE, 1, read_mode> tex,  float i) \
+  { \
+    return tex1Dfetch(tex, i); \
+  } \
+  template <enum cudaTextureReadMode read_mode> \
+  __device__ TYPE fp_tex2D(texture<TYPE, 2, read_mode> tex,  float i,  float j) \
+  { \
+    return tex2D(tex, i, j); \
+  } \
+  template <enum cudaTextureReadMode read_mode> \
+   __device__ TYPE fp_tex2DLayered(texture<TYPE, cudaTextureType2DLayered, read_mode> tex,  float i,  float j, int layer) \
+  { \
+    return tex2DLayered(tex, i, j, layer); \
+  } \
+  template <enum cudaTextureReadMode read_mode> \
+  __device__ TYPE fp_tex3D(texture<TYPE, 3, read_mode> tex,  float i,  float j,  float k) \
+  { \
+    return tex3D(tex, i, j, k); \
+  } \
+  __device__ void fp_surf2DLayeredwrite(TYPE var,surface<void, cudaSurfaceType2DLayered> surf,  float i,  float j, int layer,enum cudaSurfaceBoundaryMode mode) \
+  { \
+    surf2DLayeredwrite(var, surf, i*sizeof(TYPE), j, layer, mode); \
+  } \
+  __device__ void fp_surf2DLayeredread(TYPE *var, surface<void, cudaSurfaceType2DLayered> surf,  float i,  float j, int layer,enum cudaSurfaceBoundaryMode mode) \
+  { \
+    surf2DLayeredread(var, surf, i*sizeof(TYPE), j, layer, mode); \
+  } \
+  __device__ void fp_surf3Dwrite(TYPE var,surface<void, 3> surf,  float i,  float j,  float k, enum cudaSurfaceBoundaryMode mode) \
+  { \
+    surf3Dwrite(var, surf, i*sizeof(TYPE), j, k, mode); \
+  } \
+  __device__ void fp_surf3Dread(TYPE *var, surface<void, 3> surf,  float i,  float j,  float k, enum cudaSurfaceBoundaryMode mode) \
+  { \
+    surf3Dread(var, surf, i*sizeof(TYPE), j, k, mode); \
+  }
+  PYCUDA_GENERATE_FP_TEX_FUNCS(float)
+  PYCUDA_GENERATE_FP_TEX_FUNCS(int)
+  PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned int)
+  PYCUDA_GENERATE_FP_TEX_FUNCS(short int)
+  PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned short int)
+  PYCUDA_GENERATE_FP_TEX_FUNCS(char)
+  PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned char)
+}
+
+#endif
diff --git a/pystencils/interpolation_astnodes.py b/pystencils/interpolation_astnodes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b18c9c40e32c25668ef9d70fd6921f354bc1c261
--- /dev/null
+++ b/pystencils/interpolation_astnodes.py
@@ -0,0 +1,333 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+
+import itertools
+from typing import Set
+
+import sympy as sp
+from sympy.core.cache import cacheit
+
+import pystencils
+from pystencils.astnodes import Node
+from pystencils.data_types import TypedSymbol, cast_func, create_type
+
+try:
+    import pycuda.driver
+except Exception:
+    pass
+
+
+class LinearInterpolator(object):
+    """
+    Implements non-integer accesses on fields using linear interpolation.
+
+    On GPU, this interpolator can be implemented by a `TextureCachedField` for hardware acceleration.
+
+    Address modes are different boundary handlings possible choices are like for CUDA textures
+
+        **CLAMP**
+
+        The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.
+
+        **BORDER**
+
+        The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.
+
+        Now, to describe the last two address modes, we are forced to consider normalized coordinates,
+        so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.
+
+        **WRAP**
+
+        The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M.
+        In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.
+
+        **MIRROR**
+
+        The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2.
+        In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.
+
+    Explanations from https://stackoverflow.com/questions/19020963/the-different-addressing-modes-of-cuda-textures
+    """
+
+    required_global_declarations = []
+
+    def __init__(self, parent_field: pystencils.Field,
+                 address_mode='BORDER',
+                 use_normalized_coordinates=False):
+        super().__init__()
+
+        self.field = parent_field.new_field_with_different_name(parent_field.name)
+        self.field.field_type = pystencils.field.FieldType.CUSTOM
+        self.address_mode = address_mode
+        self.use_normalized_coordinates = use_normalized_coordinates
+        hash_str = "%x" % abs(hash(self.field) + hash(address_mode))
+        self.symbol = TypedSymbol('dummy_symbol_carrying_field' + self.field.name + hash_str,
+                                  'dummy_symbol_carrying_field' + self.field.name + hash_str)
+        self.symbol.field = self.field
+        self.symbol.interpolator = self
+
+    def at(self, offset):
+        return InterpolatorAccess(self.symbol, *offset)
+
+    def __getitem__(self, offset):
+        return InterpolatorAccess(self.symbol, *offset)
+
+    def __str__(self):
+        return '%s_interpolator' % self.field.name
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __hash__(self):
+        return hash((str(self.address_mode),
+                     str(type(self)),
+                     self.symbol,
+                     self.address_mode,
+                     self.use_normalized_coordinates))
+
+
+class InterpolatorAccess(TypedSymbol):
+    def __new__(cls, field, offsets, *args, **kwargs):
+        obj = TextureAccess.__xnew_cached_(cls, field, offsets, *args, **kwargs)
+        return obj
+
+    def __new_stage2__(self, symbol, *offsets):
+        assert offsets is not None
+        obj = super().__xnew__(self, '%s_interpolator_%x' %
+                               (symbol.field.name, abs(hash(tuple(offsets)))), symbol.field.dtype)
+        obj.offsets = offsets
+        obj.symbol = symbol
+        obj.field = symbol.field
+        obj.interpolator = symbol.interpolator
+        return obj
+
+    def __hash__(self):
+        return hash((self.symbol, self.field, tuple(self.offsets), self.interpolator))
+
+    def __str__(self):
+        return '%s_interpolator(%s)' % (self.field.name, ','.join(str(o) for o in self.offsets))
+
+    def __repr__(self):
+        return self.__str__()
+
+    def atoms(self, *types):
+        if self.offsets:
+            offsets = set(o for o in self.offsets if isinstance(o, types))
+            if isinstance(self, *types):
+                offsets.update([self])
+            for o in self.offsets:
+                if hasattr(o, 'atoms'):
+                    offsets.update(set(o.atoms(types)))
+            return offsets
+        else:
+            return set()
+
+    @property
+    def free_symbols(self):
+        symbols = set()
+        if self.offsets is not None:
+            for o in self.offsets:
+                if hasattr(o, 'free_symbols'):
+                    symbols.update(set(o.free_symbols))
+                # if hasattr(o, 'atoms'):
+                    # symbols.update(set(o.atoms(sp.Symbol)))
+
+        return symbols
+
+    @property
+    def args(self):
+        return [self.symbol, *self.offsets]
+
+    @property
+    def symbols_defined(self) -> Set[sp.Symbol]:
+        return {self}
+
+    def implementation_with_stencils(self):
+        field = self.field
+
+        default_int_type = create_type('uint32')
+        use_textures = isinstance(self, TextureAccess)
+        if use_textures:
+            def absolute_access(x, _):
+                return self.texture.at((o for o in x))
+        else:
+            absolute_access = field.absolute_access
+
+        sum = [0, ] * (field.shape[0] if field.index_dimensions else 1)
+
+        offsets = self.offsets
+        rounding_functions = (sp.floor, lambda x: sp.floor(x) + 1)
+
+        # TODO optimization: implement via lerp: https://devblogs.nvidia.com/lerp-faster-cuda/
+        for c in itertools.product(rounding_functions, repeat=field.spatial_dimensions):
+            weight = sp.Mul(*[1 - sp.Abs(f(offset) - offset) for (f, offset) in zip(c, offsets)])
+            index = [f(offset) for (f, offset) in zip(c, offsets)]
+            for channel_idx in range(field.shape[0] if field.index_dimensions else 1):
+                # Hardware boundary handling on GPU
+                if use_textures:
+                    weight = sp.Mul(*[1 - sp.Abs(f(offset) - offset) for (f, offset) in zip(c, offsets)])
+                    sum[channel_idx] += \
+                        weight * absolute_access(index, channel_idx if field.index_dimensions else ())
+                # else boundary handling using software
+                elif str(self.interpolator.address_mode).lower() == 'border':
+                    is_inside_field = sp.And(
+                        *itertools.chain([i >= 0 for i in index],
+                                         [idx < field.shape[dim] for (dim, idx) in enumerate(index)]))
+                    index = [cast_func(i, default_int_type) for i in index]
+                    sum[channel_idx] += sp.Piecewise(
+                        (weight * absolute_access(index, channel_idx if field.index_dimensions else ()),
+                            is_inside_field),
+                        (sp.simplify(0), True)
+                    )
+                elif str(self.interpolator.address_mode).lower() == 'clamp':
+                    index = [cast_func(sp.Min(sp.Max(0, i), field.shape[dim] - 1), default_int_type)
+                             for (dim, i) in enumerate(index)]
+                    sum[channel_idx] += weight * \
+                        absolute_access(index, channel_idx if field.index_dimensions else ())
+                elif str(self.interpolator.address_mode).lower() == 'wrap':
+                    index = [cast_func(sp.Piecewise((sp.Mod(i, field.shape[dim]), i >= 0),
+                                                    (field.shape[dim] + sp.Mod(i, field.shape[dim]), True)),
+                                       default_int_type)
+                             for (dim, i) in enumerate(index)]
+                    sum[channel_idx] += weight * \
+                        absolute_access(index, channel_idx if field.index_dimensions else ())
+                elif str(self.interpolator.address_mode).lower() == 'mirror':
+                    def triangle_fun(x, half_period):
+                        saw_tooth = sp.Abs(x) % (2 * half_period)
+                        return sp.Piecewise((saw_tooth, saw_tooth < half_period),
+                                            (2 * half_period - 1 - saw_tooth, True))
+                    index = [cast_func(triangle_fun(i, field.shape[dim]),
+                                       default_int_type) for (dim, i) in enumerate(index)]
+                    sum[channel_idx] += weight * absolute_access(index, channel_idx if field.index_dimensions else ())
+                else:
+                    raise NotImplementedError()
+
+        sum = [sp.factor(s) for s in sum]
+
+        if field.index_dimensions:
+            return sp.Matrix(sum)
+        else:
+            return sum[0]
+
+    # noinspection SpellCheckingInspection
+    __xnew__ = staticmethod(__new_stage2__)
+    # noinspection SpellCheckingInspection
+    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
+
+##########################################################################################
+# GPU-specific fast specializations (for precision GPUs can also use above nodes/symbols #
+##########################################################################################
+
+
+class TextureCachedField:
+
+    def __init__(self, parent_field,
+                 address_mode=None,
+                 filter_mode=None,
+                 use_normalized_coordinates=False,
+                 read_as_integer=False,
+                 cubic_bspline_interpolation=False):
+        if isinstance(address_mode, str):
+            address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
+
+        if address_mode is None:
+            address_mode = pycuda.driver.address_mode.BORDER
+        if filter_mode is None:
+            filter_mode = pycuda.driver.filter_mode.LINEAR
+
+        # self, field_name, field_type, dtype, layout, shape, strides
+        self.field = parent_field
+        self.required_global_declarations = [TextureDeclaration(self)]
+        self.address_mode = address_mode
+        self.filter_mode = filter_mode
+        self.read_as_integer = read_as_integer
+        self.use_normalized_coordinates = use_normalized_coordinates
+        self.symbol = TypedSymbol(str(self), self.field.dtype.numpy_dtype)
+        self.symbol.interpolator = self
+        self.symbol.field = self.field
+        self.cubic_bspline_interpolation = cubic_bspline_interpolation
+
+        # assert str(self.field.dtype) != 'double', "CUDA does not support double textures!"
+        # assert dtype_supports_textures(self.field.dtype), "CUDA only supports texture types with 32 bits or less"
+
+    @classmethod
+    def from_interpolator(cls, interpolator: LinearInterpolator):
+        obj = cls(interpolator.field, interpolator.address_mode)
+        return obj
+
+    def at(self, offset):
+        return TextureAccess(self.symbol, *offset)
+
+    def __getitem__(self, offset):
+        return TextureAccess(self.symbol, *offset)
+
+    def __str__(self):
+        return '%s_texture_%x' % (self.field.name, abs(hash(self.field) + hash(str(self.address_mode))))
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class TextureAccess(InterpolatorAccess):
+    def __new__(cls, texture_symbol, offsets, *args, **kwargs):
+        obj = TextureAccess.__xnew_cached_(cls, texture_symbol, offsets, *args, **kwargs)
+        return obj
+
+    def __new_stage2__(self, symbol, *offsets):
+        obj = super().__xnew__(self, symbol, *offsets)
+        obj.required_global_declarations = symbol.interpolator.required_global_declarations
+        obj.required_global_declarations[0]._symbols_defined.add(obj)
+        return obj
+
+    def __str__(self):
+        return '%s_texture(%s)' % (self.interpolator.field.name, ','.join(str(o) for o in self.offsets))
+
+    @property
+    def texture(self):
+        return self.interpolator
+
+    # noinspection SpellCheckingInspection
+    __xnew__ = staticmethod(__new_stage2__)
+    # noinspection SpellCheckingInspection
+    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
+
+
+class TextureDeclaration(Node):
+
+    def __init__(self, parent_texture):
+        self.texture = parent_texture
+        self._symbols_defined = set()
+
+    @property
+    def symbols_defined(self) -> Set[sp.Symbol]:
+        return self._symbols_defined
+
+    @property
+    def args(self) -> Set[sp.Symbol]:
+        return set()
+
+
+def dtype_supports_textures(dtype):
+    """
+    Returns whether CUDA supports texture fetches with this numpy dtype.
+
+    The maximum word size for a texture fetch is four bytes.
+
+    With this trick also larger dtypes can be fetched:
+    https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp
+
+    """
+    if hasattr(dtype, 'numpy_dtype'):
+        dtype = dtype.numpy_dtype
+
+    if isinstance(dtype, type):
+        return dtype().itemsize <= 4
+
+    return dtype.itemsize <= 4
diff --git a/pystencils/kernelcreation.py b/pystencils/kernelcreation.py
index ade980f55969a4005f2c5055ad27f861ab5524de..866f1a4ef5357e6a1a74d21b174d24785c398677 100644
--- a/pystencils/kernelcreation.py
+++ b/pystencils/kernelcreation.py
@@ -12,10 +12,18 @@ from pystencils.transformations import (
     loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
 
 
-def create_kernel(assignments, target='cpu', data_type="double", iteration_slice=None, ghost_layers=None,
+def create_kernel(assignments,
+                  target='cpu',
+                  data_type="double",
+                  iteration_slice=None,
+                  ghost_layers=None,
                   skip_independence_check=False,
-                  cpu_openmp=False, cpu_vectorize_info=None, cpu_blocking=None,
-                  gpu_indexing='block', gpu_indexing_params=MappingProxyType({})):
+                  cpu_openmp=False,
+                  cpu_vectorize_info=None,
+                  cpu_blocking=None,
+                  gpu_indexing='block',
+                  gpu_indexing_params=MappingProxyType({}),
+                  use_textures_for_interpolation=True):
     """
     Creates abstract syntax tree (AST) of kernel, using a list of update equations.
 
@@ -99,14 +107,22 @@ def create_kernel(assignments, target='cpu', data_type="double", iteration_slice
         ast = create_cuda_kernel(assignments, type_info=data_type,
                                  indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
                                  iteration_slice=iteration_slice, ghost_layers=ghost_layers,
-                                 skip_independence_check=skip_independence_check)
+                                 skip_independence_check=skip_independence_check,
+                                 use_textures_for_interpolation=use_textures_for_interpolation)
         return ast
     else:
         raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))
 
 
-def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="double", coordinate_names=('x', 'y', 'z'),
-                          cpu_openmp=True, gpu_indexing='block', gpu_indexing_params=MappingProxyType({})):
+def create_indexed_kernel(assignments,
+                          index_fields,
+                          target='cpu',
+                          data_type="double",
+                          coordinate_names=('x', 'y', 'z'),
+                          cpu_openmp=True,
+                          gpu_indexing='block',
+                          gpu_indexing_params=MappingProxyType({}),
+                          use_textures_for_interpolation=True):
     """
     Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
     coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
@@ -159,8 +175,12 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do
     elif target == 'gpu':
         from pystencils.gpucuda import created_indexed_cuda_kernel
         idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
-        ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type,
-                                          coordinate_names=coordinate_names, indexing_creator=idx_creator)
+        ast = created_indexed_cuda_kernel(assignments,
+                                          index_fields,
+                                          type_info=data_type,
+                                          coordinate_names=coordinate_names,
+                                          indexing_creator=idx_creator,
+                                          use_textures_for_interpolation=use_textures_for_interpolation)
         return ast
     else:
         raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
diff --git a/pystencils/spatial_coordinates.py b/pystencils/spatial_coordinates.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c3ba4db7d98b50bce9442619fbbce07782004a1
--- /dev/null
+++ b/pystencils/spatial_coordinates.py
@@ -0,0 +1,18 @@
+
+import sympy
+
+import pystencils
+import pystencils.astnodes
+
+x_, y_, z_ = tuple(pystencils.astnodes.LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(3))
+x_staggered, y_staggered, z_staggered = x_ + 0.5, y_ + 0.5, z_ + 0.5
+
+
+def x_vector(ndim):
+    return sympy.Matrix(tuple(pystencils.astnodes.LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(ndim)))
+
+
+def x_staggered_vector(ndim):
+    return sympy.Matrix(tuple(
+        pystencils.astnodes.LoopOverCoordinate.get_loop_counter_symbol(i) + 0.5 for i in range(ndim)
+    ))
diff --git a/pystencils/test_gettypeofexpression.py b/pystencils/test_gettypeofexpression.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa480409efc36a91fddfa8d7aff74d745d3624fd
--- /dev/null
+++ b/pystencils/test_gettypeofexpression.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+import sympy as sp
+
+from pystencils.data_types import create_type, get_type_of_expression
+
+
+def test_gettypeofexpression():
+    random_constant = sp.sqrt(sp.pi / 5)
+    type = get_type_of_expression(random_constant)
+    type == create_type('float64')
+
+    random_constant = sp.pi
+    type = get_type_of_expression(random_constant)
+    type == create_type('float64')
+
+    random_constant = sp.pi / sp.pi
+    type = get_type_of_expression(random_constant)
+    type == create_type('int64')
+
+
+def main():
+    test_gettypeofexpression()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index 60a22d812578db85d7f375a74144352e431ffc62..e38027d2f4c7e7fb9eb40b88f48fc394f43fb8d4 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -9,11 +9,13 @@ import sympy as sp
 from sympy.logic.boolalg import Boolean
 
 import pystencils.astnodes as ast
+import pystencils.integer_functions
 from pystencils.assignment import Assignment
 from pystencils.data_types import (
     PointerType, StructType, TypedSymbol, cast_func, collate_types, create_type, get_base_type,
     get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func)
 from pystencils.field import AbstractField, Field, FieldType
+from pystencils.interpolation_astnodes import InterpolatorAccess, TextureAccess, TextureCachedField
 from pystencils.kernelparameters import FieldPointerSymbol
 from pystencils.simp.assignment_collection import AssignmentCollection
 from pystencils.slicing import normalize_slice
@@ -503,7 +505,10 @@ def resolve_field_accesses(ast_node, read_only_field_names=set(),
             if isinstance(expr, ast.ResolvedFieldAccess):
                 return expr
 
-            new_args = [visit_sympy_expr(e, enclosing_block, sympy_assignment) for e in expr.args]
+            if hasattr(expr, 'args'):
+                new_args = [visit_sympy_expr(e, enclosing_block, sympy_assignment) for e in expr.args]
+            else:
+                new_args = []
             kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {}
             return expr.func(*new_args, **kwargs) if new_args else expr
 
@@ -521,7 +526,9 @@ def resolve_field_accesses(ast_node, read_only_field_names=set(),
             if sub_ast.false_block:
                 visit_node(sub_ast.false_block)
         else:
-            for i, a in enumerate(sub_ast.args):
+            if isinstance(sub_ast, (bool, int, float)):
+                return
+            for a in sub_ast.args:
                 visit_node(a)
 
     return visit_node(ast_node)
@@ -798,12 +805,26 @@ class KernelConstraintsCheck:
             self.fields_read.add(rhs.field)
             self.fields_read.update(rhs.indirect_addressing_fields)
             return rhs
+        elif isinstance(rhs, InterpolatorAccess):
+            new_args = [self.process_expression(arg, type_constants) for arg in rhs.offsets]
+            if new_args:
+                rhs.offsets = new_args
+            return rhs
         elif isinstance(rhs, TypedSymbol):
             return rhs
         elif isinstance(rhs, sp.Symbol):
             return TypedSymbol(rhs.name, self._type_for_symbol[rhs.name])
         elif type_constants and isinstance(rhs, sp.Number):
             return cast_func(rhs, create_type(self._type_for_symbol['_constant']))
+        elif isinstance(rhs, sp.boolalg.BooleanFunction) or \
+                type(rhs) in pystencils.integer_functions.__dict__.values():
+            new_args = [self.process_expression(a, type_constants) for a in rhs.args]
+            types_of_expressions = [get_type_of_expression(a) for a in new_args]
+            arg_type = collate_types(types_of_expressions, forbid_collation_to_float=True)
+            new_args = [a if not hasattr(a, 'dtype') or a.dtype == arg_type
+                        else cast_func(a, arg_type)
+                        for a in new_args]
+            return rhs.func(*new_args)
         elif isinstance(rhs, sp.Mul):
             new_args = [self.process_expression(arg, type_constants) if arg not in (-1, 1) else arg for arg in rhs.args]
             return rhs.func(*new_args) if new_args else rhs
@@ -955,7 +976,8 @@ def insert_casts(node):
         # TODO optimize pow, don't cast integer on double
         types = [get_type_of_expression(arg) for arg in args]
         assert len(types) > 0
-        target = collate_types(types)
+        # Never ever, ever collate to float type for boolean functions!
+        target = collate_types(types, forbid_collation_to_float=isinstance(node.func, sp.boolalg.BooleanFunction))
         zipped = list(zip(args, types))
         if target.func is PointerType:
             assert node.func is sp.Add
@@ -1150,7 +1172,6 @@ def replace_inner_stride_with_one(ast_node: ast.KernelFunction) -> None:
 
 def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
     """Blocking of loops to enhance cache locality. Modifies the ast node in-place.
-
     Args:
         ast_node: kernel function node before vectorization transformation has been applied
         block_size: sequence defining block size in x, y, (z) direction
@@ -1195,3 +1216,48 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
         inner_loop.start = block_ctr
         inner_loop.stop = stop
     return len(coordinates)
+
+
+def implement_interpolations(ast_node: ast.Node,
+                             implement_by_texture_accesses: bool = False,
+                             vectorize: bool = False,
+                             use_hardware_interpolation_for_f32=True):
+    # TODO: perform this function on assignments, when unify_shape_symbols allows differently sized fields
+
+    assert not(implement_by_texture_accesses and vectorize), \
+        "can only implement interpolations either by texture accesses or CPU vectorization"
+    FLOAT32_T = create_type('float32')
+
+    interpolation_accesses = ast_node.atoms(InterpolatorAccess)
+
+    def can_use_hw_interpolation(i):
+        return use_hardware_interpolation_for_f32 and i.dtype == FLOAT32_T and isinstance(i, TextureAccess)
+
+    if implement_by_texture_accesses:
+
+        interpolators = {a.symbol.interpolator for a in interpolation_accesses}
+        to_texture_map = {i: TextureCachedField.from_interpolator(i) for i in interpolators}
+
+        substitutions = {i: to_texture_map[i.symbol.interpolator].at(
+            [o for o in i.offsets]) for i in interpolation_accesses}
+
+        import pycuda.driver as cuda
+        for texture in substitutions.values():
+            if can_use_hw_interpolation(texture):
+                texture.filter_mode = cuda.filter_mode.LINEAR
+            else:
+                texture.filter_mode = cuda.filter_mode.POINT
+                texture.read_as_integer = True
+
+        ast_node.subs(substitutions)
+
+        # Update after replacements
+        interpolation_accesses = ast_node.atoms(InterpolatorAccess)
+
+    if vectorize:
+        # TODO can be done in _interpolator_access_to_stencils field.absolute_access == simd_gather
+        raise NotImplementedError()
+    else:
+        substitutions = {i: i.implementation_with_stencils()
+                         for i in interpolation_accesses if not can_use_hw_interpolation(i)}
+        ast_node.subs(substitutions)
diff --git a/pystencils_tests/test_cuda_known_functions.py b/pystencils_tests/test_cuda_known_functions.py
index 5742a3262806d60024824994d2baf28ae6f2d8ec..ce0bdfea47f368b466b42dd308f0443303cb6ea5 100644
--- a/pystencils_tests/test_cuda_known_functions.py
+++ b/pystencils_tests/test_cuda_known_functions.py
@@ -1,7 +1,7 @@
 import sympy
 
 import pystencils
-from pystencils.astnodes import get_dummy_symbol
+from pystencils.data_types import get_dummy_symbol
 from pystencils.backends.cuda_backend import CudaSympyPrinter
 from pystencils.data_types import address_of
 
diff --git a/pystencils_tests/test_cudasympyprinter.py b/pystencils_tests/test_cudasympyprinter.py
new file mode 100644
index 0000000000000000000000000000000000000000..08c14aa79b7ca5a699b476e5455eb4356ebedd1c
--- /dev/null
+++ b/pystencils_tests/test_cudasympyprinter.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+import sympy as sp
+
+import pystencils
+
+
+def test_cudasympyprinter():
+
+    for use_single_precision in (False, True):
+        x, y = pystencils.fields('x,y: float%s [2d]' % ('32' if use_single_precision else '64'))
+        a, b, c = sp.symbols('a,b,c')
+
+        random_constant_with_integer = sp.sqrt(sp.pi / 5)
+
+        assignments = pystencils.AssignmentCollection({
+            a: 1 / sp.sqrt(x[1, 1]),
+            b: random_constant_with_integer,
+            y.center(): sp.sqrt(x.center()) * (x[-1, 0] ** x[+1, 0]) % (x[-1, 0] ** x[+1, 0])
+        })
+        print(assignments)
+        ast = pystencils.create_kernel(assignments, target='gpu')
+        print(ast)
+        code = str(pystencils.show_code(ast))
+        print(code)
+
+        if use_single_precision:
+            assert 'fmodf(' in code
+            assert 'sqrtf(' in code
+            assert 'powf(' in code
+        else:
+            assert 'fmod(' in code
+            assert 'sqrt(' in code
+            assert 'pow(' in code
+
+
+def main():
+    test_cudasympyprinter()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pystencils_tests/test_data/lenna.png b/pystencils_tests/test_data/lenna.png
new file mode 100644
index 0000000000000000000000000000000000000000..59ef68aabd033341028ccd2b1f6c5871c02b9d7b
Binary files /dev/null and b/pystencils_tests/test_data/lenna.png differ
diff --git a/pystencils_tests/test_data/test_vessel2d_mask.png b/pystencils_tests/test_data/test_vessel2d_mask.png
new file mode 100644
index 0000000000000000000000000000000000000000..1e86b699b10cd8749f899a7600cd0a46fbe98b7b
Binary files /dev/null and b/pystencils_tests/test_data/test_vessel2d_mask.png differ
diff --git a/pystencils_tests/test_field_coordinates.py b/pystencils_tests/test_field_coordinates.py
new file mode 100644
index 0000000000000000000000000000000000000000..1aeed343bc0c77d6e6b56b3d48e68c100be3f56c
--- /dev/null
+++ b/pystencils_tests/test_field_coordinates.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+from os.path import dirname, join
+
+import numpy as np
+import sympy
+
+import pystencils
+from pystencils.interpolation_astnodes import LinearInterpolator
+
+try:
+    import pyconrad.autoinit
+except Exception:
+    import unittest.mock
+    pyconrad = unittest.mock.MagicMock()
+
+LENNA_FILE = join(dirname(__file__), 'test_data', 'lenna.png')
+
+try:
+    import skimage.io
+    lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float32)
+except Exception:
+    lenna = np.random.rand(20, 30)
+
+
+def test_rotate_center():
+    x, y = pystencils.fields('x, y:  float32[2d]')
+
+    # Rotate around center when setting coordindates origins to field centers
+    x.set_coordinate_origin_to_field_center()
+    y.set_coordinate_origin_to_field_center()
+
+    rotation_angle = sympy.pi / 5
+    transform_matrix = sympy.rot_axis3(rotation_angle)[:2, :2]
+
+    # Generic matrix transform works like that (for rotation it would be more clever to use transform_matrix.T)
+    inverse_matrix = transform_matrix.inv()
+    input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates)
+
+    assignments = pystencils.AssignmentCollection({
+        y.center(): LinearInterpolator(x).at(input_coordinate)
+    })
+
+    kernel = pystencils.create_kernel(assignments).compile()
+    rotated = np.zeros_like(lenna)
+
+    kernel(x=lenna, y=rotated)
+
+    pyconrad.imshow(lenna, "lenna")
+    pyconrad.imshow(rotated, "rotated")
+
+    # If distance in input field is twice as close we will see a smaller image
+    x.coordinate_transform /= 2
+
+    input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates)
+
+    assignments = pystencils.AssignmentCollection({
+        y.center(): LinearInterpolator(x).at(input_coordinate)
+    })
+
+    kernel = pystencils.create_kernel(assignments).compile()
+    rotated = np.zeros_like(lenna)
+
+    kernel(x=lenna, y=rotated)
+
+    pyconrad.imshow(rotated, "rotated smaller")
+
+    # Conversely, if output field has samples 3 times closer we will see a bigger image
+    y.coordinate_transform /= 3
+
+    input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates)
+
+    assignments = pystencils.AssignmentCollection({
+        y.center(): LinearInterpolator(x).at(input_coordinate)
+    })
+
+    kernel = pystencils.create_kernel(assignments).compile()
+    rotated = np.zeros_like(lenna)
+
+    kernel(x=lenna, y=rotated)
+
+    pyconrad.imshow(rotated, "rotated bigger")
+
+    # coordinate_transform can be any matrix, also with symbols as entries
+
+
+def main():
+    test_rotate_center()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pystencils_tests/test_interpolation.py b/pystencils_tests/test_interpolation.py
new file mode 100644
index 0000000000000000000000000000000000000000..64ec853d45b6315d666be267ff1a821b5db6c65b
--- /dev/null
+++ b/pystencils_tests/test_interpolation.py
@@ -0,0 +1,224 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
+#
+# Distributed under terms of the GPLv3 license.
+
+"""
+
+"""
+from os.path import dirname, join
+
+import numpy as np
+import sympy
+
+import pycuda.autoinit  # NOQA
+import pycuda.gpuarray as gpuarray
+import pystencils
+from pystencils.interpolation_astnodes import LinearInterpolator
+from pystencils.spatial_coordinates import x_, y_
+
+type_map = {
+    np.float32: 'float32',
+    np.float64: 'float64',
+    np.int32: 'int32',
+}
+
+try:
+    import pyconrad.autoinit
+except Exception:
+    import unittest.mock
+    pyconrad = unittest.mock.MagicMock()
+
+LENNA_FILE = join(dirname(__file__), 'test_data', 'lenna.png')
+
+try:
+    import skimage.io
+    lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float64)
+    pyconrad.imshow(lenna)
+except Exception:
+    lenna = np.random.rand(20, 30)
+
+
+def test_interpolation():
+    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
+
+    assignments = pystencils.AssignmentCollection({
+        y_f.center(): LinearInterpolator(x_f).at([x_ + 2.7, y_ + 7.2])
+    })
+    print(assignments)
+    ast = pystencils.create_kernel(assignments)
+    print(ast)
+    print(pystencils.show_code(ast))
+    kernel = ast.compile()
+
+    pyconrad.imshow(lenna)
+
+    out = np.zeros_like(lenna)
+    kernel(x=lenna, y=out)
+    pyconrad.imshow(out, "out")
+
+
+def test_scale_interpolation():
+    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
+
+    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
+        assignments = pystencils.AssignmentCollection({
+            y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at([0.5 * x_ + 2.7, 0.25 * y_ + 7.2])
+        })
+        print(assignments)
+        ast = pystencils.create_kernel(assignments)
+        print(ast)
+        print(pystencils.show_code(ast))
+        kernel = ast.compile()
+
+        out = np.zeros_like(lenna)
+        kernel(x=lenna, y=out)
+        pyconrad.imshow(out, "out " + address_mode)
+
+
+def test_rotate_interpolation():
+    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
+
+    rotation_angle = sympy.pi / 5
+
+    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
+
+        transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_))
+        assignments = pystencils.AssignmentCollection({
+            y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
+        })
+        print(assignments)
+        ast = pystencils.create_kernel(assignments)
+        print(ast)
+        print(pystencils.show_code(ast))
+        kernel = ast.compile()
+
+        out = np.zeros_like(lenna)
+        kernel(x=lenna, y=out)
+        pyconrad.imshow(out, "out " + address_mode)
+
+
+def test_rotate_interpolation_gpu():
+
+    rotation_angle = sympy.pi / 5
+    scale = 1
+
+    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
+        previous_result = None
+        for dtype in [np.int32, np.float32, np.float64]:
+            if dtype == np.int32:
+                lenna_gpu = gpuarray.to_gpu(
+                    np.ascontiguousarray(lenna * 255, dtype))
+            else:
+                lenna_gpu = gpuarray.to_gpu(
+                    np.ascontiguousarray(lenna, dtype))
+            for use_textures in [True, False]:
+                x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0)
+
+                transformed = scale * \
+                    sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2])
+                assignments = pystencils.AssignmentCollection({
+                    y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
+                })
+                print(assignments)
+                ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
+                print(ast)
+                print(pystencils.show_code(ast))
+                kernel = ast.compile()
+
+                out = gpuarray.zeros_like(lenna_gpu)
+                kernel(x=lenna_gpu, y=out)
+                pyconrad.imshow(out,
+                                f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
+                skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif",
+                                  np.ascontiguousarray(out.get(), np.float32))
+                if previous_result is not None:
+                    try:
+                        assert np.allclose(previous_result[4:-4, 4:-4], out.get()[4:-4, 4:-4], rtol=100, atol=1e-3)
+                    except AssertionError as e:  # NOQA
+                        print("Max error: %f" % np.max(previous_result - out.get()))
+                        # pyconrad.imshow(previous_result - out.get(), "Difference image")
+                        # raise e
+                previous_result = out.get()
+
+
+def test_shift_interpolation_gpu():
+
+    rotation_angle = 0  # sympy.pi / 5
+    scale = 1
+    # shift = - sympy.Matrix([1.5, 1.5])
+    shift = sympy.Matrix((0.0, 0.0))
+
+    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
+        previous_result = None
+        for dtype in [np.float64, np.float32, np.int32]:
+            if dtype == np.int32:
+                lenna_gpu = gpuarray.to_gpu(
+                    np.ascontiguousarray(lenna * 255, dtype))
+            else:
+                lenna_gpu = gpuarray.to_gpu(
+                    np.ascontiguousarray(lenna, dtype))
+            for use_textures in [True, False]:
+                x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0)
+
+                if use_textures:
+                    transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
+                else:
+                    transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
+                assignments = pystencils.AssignmentCollection({
+                    y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
+                })
+                # print(assignments)
+                ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
+                # print(ast)
+                print(pystencils.show_code(ast))
+                kernel = ast.compile()
+
+                out = gpuarray.zeros_like(lenna_gpu)
+                kernel(x=lenna_gpu, y=out)
+                pyconrad.imshow(out,
+                                f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
+                skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif",
+                                  np.ascontiguousarray(out.get(), np.float32))
+                # if not (use_single_precision and use_textures):
+                # if previous_result is not None:
+                # try:
+                # assert np.allclose(previous_result[4:-4, 4:-4], out.get()
+                # [4:-4, 4:-4], rtol=1e-3, atol=1e-2)
+                # except AssertionError as e:
+                # print("Max error: %f" % np.max(np.abs(previous_result[4:-4, 4:-4] - out.get()[4:-4, 4:-4])))
+                # pyconrad.imshow(previous_result[4:-4, 4:-4] - out.get()[4:-4, 4:-4], "Difference image")
+                # raise e
+                # previous_result = out.get()
+
+
+def test_rotate_interpolation_size_change():
+    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
+
+    rotation_angle = sympy.pi / 5
+
+    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
+
+        transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_))
+        assignments = pystencils.AssignmentCollection({
+            y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
+        })
+        print(assignments)
+        ast = pystencils.create_kernel(assignments)
+        print(ast)
+        print(pystencils.show_code(ast))
+        kernel = ast.compile()
+
+        out = np.zeros((100, 150), np.float64)
+        kernel(x=lenna, y=out)
+        pyconrad.imshow(out, "small out " + address_mode)
+
+
+def main():
+
+    test_shift_interpolation_gpu()
+
+
+if __name__ == '__main__':
+    main()