From 16bfa1e4b9401b19bb2d3fede2a352d3e24e9aa3 Mon Sep 17 00:00:00 2001
From: Stephan Seitz <stephan.seitz@fau.de>
Date: Wed, 7 Aug 2019 13:56:59 +0200
Subject: [PATCH] Delete duplicate files in wrong place

---
 tests/lbm/_assignment_transforms.py           |  54 ------
 tests/lbm/_layout_fixer.py                    |  29 ---
 tests/lbm/_utils.py                           |  37 ----
 tests/lbm/adjoint_field.py                    |  30 ---
 tests/lbm/backends/_pytorch.py                | 150 ---------------
 tests/lbm/backends/_tensorflow.py             |  72 -------
 tests/lbm/backends/_tensorflow_cpp.py         |  14 --
 tests/lbm/backends/_torch_native.py           | 175 ------------------
 tests/lbm/backends/torch_native_cpu.tmpl.cpp  |  76 --------
 tests/lbm/backends/torch_native_cuda.tmpl.cpp |  79 --------
 tests/lbm/backends/torch_native_cuda.tmpl.cu  | 103 -----------
 tests/lbm/losses.py                           |  27 ---
 12 files changed, 846 deletions(-)
 delete mode 100644 tests/lbm/_assignment_transforms.py
 delete mode 100644 tests/lbm/_layout_fixer.py
 delete mode 100644 tests/lbm/_utils.py
 delete mode 100644 tests/lbm/adjoint_field.py
 delete mode 100644 tests/lbm/backends/_pytorch.py
 delete mode 100644 tests/lbm/backends/_tensorflow.py
 delete mode 100644 tests/lbm/backends/_tensorflow_cpp.py
 delete mode 100644 tests/lbm/backends/_torch_native.py
 delete mode 100644 tests/lbm/backends/torch_native_cpu.tmpl.cpp
 delete mode 100644 tests/lbm/backends/torch_native_cuda.tmpl.cpp
 delete mode 100644 tests/lbm/backends/torch_native_cuda.tmpl.cu
 delete mode 100644 tests/lbm/losses.py

diff --git a/tests/lbm/_assignment_transforms.py b/tests/lbm/_assignment_transforms.py
deleted file mode 100644
index ca465fd..0000000
--- a/tests/lbm/_assignment_transforms.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-import pystencils as ps
-
-
-def shift_assignments(assignments: List[ps.Assignment], shift_vector, shift_write_assignments=True, shift_read_assignments=True):
-    if hasattr(assignments, "main_assignments"):
-        assignments = assignments.main_assignments
-
-    shifted_assignments = assignments
-    for i, a in enumerate(shifted_assignments):
-
-        for symbol in [*a.free_symbols, a.rhs]:
-            if isinstance(symbol, ps.Field.Access):
-                shifted_access = symbol.get_shifted(
-                    *shift_vector)
-            shifted_assignments[i] = shifted_assignments[i].subs(
-                symbol, shifted_access)
-    return shifted_assignments
-
-
-def transform_assignments_rhs(assignments, rhs_transform_function):
-    new_assignments = []
-    for _, a in enumerate(assignments):
-        new_rhs = rhs_transform_function(a)
-
-        new_assignments.append(
-            ps.Assignment(a.lhs, new_rhs))
-
-    return new_assignments
-
-
-def transform_assignments(assignments, transform_function):
-    new_assignments = []
-    for _, a in enumerate(assignments):
-        new_assignment = transform_function(a)
-
-        if isinstance(new_assignment, tuple):
-            lhs, rhs = new_assignment
-            new_assignment = ps.Assignment(lhs, rhs)
-
-        if new_assignment is not None:
-            new_assignments.append(new_assignment)
-
-    return new_assignments
-
-
-def replace_symbols_in_assignments(assignments: List[ps.Assignment], symbol, replace_symbol):
-    if hasattr(assignments, "main_assignments"):
-        assignments = assignments.main_assignments
-
-    shifted_assignments = assignments
-    for i, a in enumerate(shifted_assignments):
-        shifted_assignments[i] = a.subs(symbol, replace_symbol)
-    return shifted_assignments
diff --git a/tests/lbm/_layout_fixer.py b/tests/lbm/_layout_fixer.py
deleted file mode 100644
index 2ff9781..0000000
--- a/tests/lbm/_layout_fixer.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import numpy as np
-import pystencils as ps
-from pystencils_autodiff.backends import AVAILABLE_BACKENDS
-
-
-def fix_layout(array, target_field, backend):
-    assert array.shape == target_field.shape, "Array %s's shape should be %s but is %s" % (
-        target_field.name, target_field.shape, array.shape)
-    assert backend.lower() in AVAILABLE_BACKENDS
-
-    # Just index coordinate wrong?
-    swapped_array = np.swapaxes(array, 0, -1)
-    if swapped_array.strides == target_field.strides and swapped_array.shape == target_field.shade and target_field.index_dimensions == 1:
-        array = swapped_array
-
-    # Everything ok
-    everything_ok = (array.strides == target_field.strides
-                     and array.shape == target_field.shape)
-
-    if everything_ok:
-        rtn = array
-    else:  # no, fix it!
-        f = target_field
-        rtn = np.lib.stride_tricks.as_strided(np.zeros(f.shape, dtype=f.dtype.numpy_dtype),
-                                              f.shape,
-                                              [f.dtype.numpy_dtype.itemsize * a for a in f.strides])
-        rtn[...] = array
-
-    return rtn
diff --git a/tests/lbm/_utils.py b/tests/lbm/_utils.py
deleted file mode 100644
index 0f4a7fe..0000000
--- a/tests/lbm/_utils.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import numpy as np
-
-try:
-    import tensorflow as tf
-except ImportError:
-    pass
-try:
-    import torch
-except ImportError:
-    pass
-
-
-def tf_constant_from_field(field, init_val=0):
-    return tf.constant(init_val, dtype=field.dtype.numpy_dtype, shape=field.shape, name=field.name + '_constant')
-
-
-def tf_scalar_variable_from_field(field, init_val, constraint=None):
-    var = tf.Variable(init_val, dtype=field.dtype.numpy_dtype, name=field.name + '_variable', constraint=constraint)
-    return var * tf_constant_from_field(field, 1)
-
-
-def tf_variable_from_field(field, init_val=0, constraint=None):
-    if isinstance(init_val, (int, float)):
-        init_val *= np.ones(field.shape, field.dtype.numpy_dtype)
-
-    return tf.Variable(init_val, dtype=field.dtype.numpy_dtype, name=field.name + '_variable', constraint=constraint)
-
-
-def tf_placeholder_from_field(field):
-    return tf.placeholder(dtype=field.dtype.numpy_dtype, name=field.name + '_placeholder', shape=field.shape)
-
-
-def torch_tensor_from_field(field, init_val=0, cuda=True, requires_grad=False):
-    if isinstance(init_val, (int, float)):
-        init_val *= np.ones(field.shape, field.dtype.numpy_dtype)
-    device = torch.device('cuda' if cuda else 'cpu')
-    return torch.tensor(init_val, requires_grad=requires_grad, device=device)
diff --git a/tests/lbm/adjoint_field.py b/tests/lbm/adjoint_field.py
deleted file mode 100644
index 3ac9392..0000000
--- a/tests/lbm/adjoint_field.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import pystencils
-from pystencils.astnodes import FieldShapeSymbol, FieldStrideSymbol
-
-"""
-Determines how adjoint fields will be denoted in LaTeX output in terms of the forward field representation %s
-Default: "\\hat{%s}"
-"""
-ADJOINT_FIELD_LATEX_HIGHLIGHT = r"\hat{%s}"
-
-
-class AdjointField(pystencils.Field):
-    """Field representing adjoint variables to a Field representing the forward variables"""
-
-    def __init__(self, forward_field, name_prefix='diff'):
-        new_name = name_prefix + forward_field.name
-        super(AdjointField, self).__init__(new_name, forward_field.field_type, forward_field._dtype,
-                                           forward_field._layout, forward_field.shape, forward_field.strides)
-        self.corresponding_forward_field = forward_field
-        self.name_prefix = name_prefix
-
-        # Eliminate references to forward fields that might not be present in backward kernels
-        self.shape = tuple(FieldShapeSymbol([self.name], s.coordinate) if isinstance(
-            s, FieldShapeSymbol) else s for s in self.shape)
-        self.strides = tuple(FieldStrideSymbol(self.name, s.coordinate) if isinstance(
-            s, FieldStrideSymbol) else s for s in self.strides)
-
-        if forward_field.latex_name:
-            self.latex_name = ADJOINT_FIELD_LATEX_HIGHLIGHT % forward_field.latex_name
-        else:
-            self.latex_name = ADJOINT_FIELD_LATEX_HIGHLIGHT % forward_field.name
diff --git a/tests/lbm/backends/_pytorch.py b/tests/lbm/backends/_pytorch.py
deleted file mode 100644
index db53c3f..0000000
--- a/tests/lbm/backends/_pytorch.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import uuid
-
-import numpy as np
-import torch
-
-try:
-    import pycuda.autoinit
-    import pycuda.gpuarray
-    import pycuda.driver
-    HAS_PYCUDA = True
-except Exception:
-    HAS_PYCUDA = False
-
-# Fails if different context/thread
-
-
-def tensor_to_gpuarray(tensor):
-    if not tensor.is_cuda:
-        raise ValueError(
-            'Cannot convert CPU tensor to GPUArray (call `cuda()` on it)')
-    else:
-        return pycuda.gpuarray.GPUArray(tensor.shape,
-                                        dtype=torch_dtype_to_numpy(tensor.dtype),
-                                        gpudata=tensor.data_ptr())
-
-
-def create_autograd_function(autodiff_obj, inputfield_to_tensor_dict, forward_loop, backward_loop,
-                             convert_tensors_to_arrays=True):
-    field_to_tensor_dict = inputfield_to_tensor_dict
-    backward_input_fields = autodiff_obj.backward_input_fields
-
-    # Allocate output tensor for forward and backward pass
-    for field in autodiff_obj.forward_output_fields + autodiff_obj.backward_output_fields:
-        field_to_tensor_dict[field] = torch.zeros(
-            *field.shape,
-            dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype),
-            device=list(inputfield_to_tensor_dict.values())[0].device)
-
-    tensor_to_field_dict = {
-        v: k for k, v in field_to_tensor_dict.items()}
-
-    def _tensors_to_dict(is_cuda, args, additional_dict={}):
-        arrays = dict()
-        lookup_dict = {**tensor_to_field_dict, **additional_dict}
-        for a in args:
-
-            if convert_tensors_to_arrays:
-                if is_cuda:
-                    a.cuda()
-                    array = tensor_to_gpuarray(a)
-                else:
-                    a.cpu()
-                    array = a.data.numpy()
-
-                try:
-                    arrays[lookup_dict[a].name] = array
-                except:
-                    pass
-            else:
-                array = a
-                try:
-                    arrays[lookup_dict[a].name] = array
-                except:
-                    pass
-        return arrays
-
-    def forward(self, *input_tensors):
-
-        self.save_for_backward(*input_tensors)
-        all_tensors = field_to_tensor_dict.values()
-
-        is_cuda = all(a.is_cuda for a in all_tensors)
-        arrays = _tensors_to_dict(is_cuda, all_tensors)
-
-        forward_loop(**arrays, is_cuda=is_cuda)
-
-        return tuple(field_to_tensor_dict[f] for f in autodiff_obj.forward_output_fields)
-
-    def backward(self, *grad_outputs):
-        all_tensors = grad_outputs + tuple(field_to_tensor_dict.values())
-
-        is_cuda = all(a.is_cuda for a in all_tensors)
-        arrays = _tensors_to_dict(is_cuda, all_tensors, additional_dict={
-            f.name: grad_outputs[i] for i, f in enumerate(backward_input_fields)})
-        backward_loop(**arrays, is_cuda=is_cuda)
-        return tuple(field_to_tensor_dict[f] for f in autodiff_obj.backward_output_fields)
-
-    cls = type(str(uuid.uuid4()), (torch.autograd.Function,), {})
-    cls.forward = forward
-    cls.backward = backward
-    return cls
-
-
-# from: https://stackoverflow.com/questions/51438232/how-can-i-create-a-pycuda-gpuarray-from-a-gpu-memory-address
-
-
-def torch_dtype_to_numpy(dtype):
-    dtype_name = str(dtype).replace('torch.', '')     # remove 'torch.'
-    return getattr(np, dtype_name)
-
-
-def numpy_dtype_to_torch(dtype):
-    dtype_name = str(dtype)
-    return getattr(torch, dtype_name)
-
-
-def gpuarray_to_tensor(gpuarray, context=None):
-    """
-    Convert a :class:`pycuda.gpuarray.GPUArray` to a :class:`torch.Tensor`. The underlying
-    storage will NOT be shared, since a new copy must be allocated.
-    Parameters
-    ----------
-    gpuarray  :   pycuda.gpuarray.GPUArray
-    Returns
-    -------
-    torch.Tensor
-    """
-    if not context:
-        context = pycuda.autoinit.context
-    shape = gpuarray.shape
-    dtype = gpuarray.dtype
-    out_dtype = dtype
-    out = torch.zeros(shape, dtype=out_dtype).cuda()
-    gpuarray_copy = tensor_to_gpuarray(out)
-    byte_size = gpuarray.itemsize * gpuarray.size
-    pycuda.driver.memcpy_dtod(gpuarray_copy.gpudata,
-                              gpuarray.gpudata, byte_size)
-    return out
-
-
-if HAS_PYCUDA:
-    class GpuPointerHolder(pycuda.driver.PointerHolderBase):
-
-        def __init__(self, tensor):
-            super().__init__()
-            self.tensor = tensor
-            self.gpudata = tensor.data_ptr()
-
-        def get_pointer(self):
-            return self.tensor.data_ptr()
-
-        def __int__(self):
-            return self.__index__()
-
-        # without an __index__ method, arithmetic calls to the GPUArray backed by this pointer fail
-        # not sure why, this needs to return some integer, apparently
-        def __index__(self):
-            return self.gpudata
-else:
-    GpuPointerHolder = None
diff --git a/tests/lbm/backends/_tensorflow.py b/tests/lbm/backends/_tensorflow.py
deleted file mode 100644
index a4df191..0000000
--- a/tests/lbm/backends/_tensorflow.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import tensorflow as tf
-import pystencils_autodiff
-import numpy as np
-from pystencils.utils import DotDict
-
-from tf.compat.v1 import get_default_graph
-
-_num_generated_ops = 0
-
-
-def _py_func(func, inp, Tout, stateful=False, name=None, grad=None):
-    """
-    Copied from random internet forum. It seems to be important to give 
-    PyFunc to give an random name in override map to properly register gradients
-
-    PyFunc defined as given by Tensorflow
-    :param func: Custom Function
-    :param inp: Function Inputs
-    :param Tout: Output Type of out Custom Function
-    :param stateful: Calculate Gradients when stateful is True
-    :param name: Name of the PyFunction
-    :param grad: Custom Gradient Function
-    :return:
-    """
-    # Generate Random Gradient name in order to avoid conflicts with inbuilt names
-    global _num_generated_ops
-    rnd_name = 'PyFuncGrad' + str(_num_generated_ops) + 'ABC@a1b2c3'
-    _num_generated_ops += 1
-
-    # Register Tensorflow Gradient
-    tf.RegisterGradient(rnd_name)(grad)
-
-    # Get current graph
-    g = get_default_graph()
-
-    # Add gradient override map
-    with g.gradient_override_map({"PyFunc": rnd_name, "PyFuncStateless": rnd_name}):
-        return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
-
-
-def tensorflowop_from_autodiffop(autodiffop: pystencils_autodiff.AutoDiffOp, inputfield_tensor_dict, forward_function, backward_function):
-
-    def helper_forward(*args):
-        kwargs = dict()
-        for i in range(len(args)):
-            if args[i] is not None:
-                kwargs[autodiffop.forward_input_fields[i].name] = args[i]
-
-        rtn_dict = forward_function(**kwargs)
-        return [rtn_dict[o.name] for o in autodiffop._forward_output_fields]
-
-    def helper_backward(*args):
-        kwargs = dict()
-        for i in range(len(args)):
-            if i < len(autodiffop.forward_input_fields):
-                kwargs[autodiffop.forward_input_fields[i].name] = args[i]
-            else:
-                kwargs[autodiffop._backward_input_fields[i -
-                                                         len(autodiffop.forward_input_fields)].name] = args[i]
-        rtn_dict = backward_function(**kwargs)
-        return [rtn_dict[o.name] for o in autodiffop._backward_output_fields]
-
-    def backward(op, *grad):
-        return tf.py_func(helper_backward, [*op.inputs, *grad], [f.dtype.numpy_dtype for f in autodiffop._backward_output_fields], name=autodiffop.op_name + '_backward', stateful=False)
-
-    output_tensors = _py_func(helper_forward,
-                              [inputfield_tensor_dict[f]
-                               for f in autodiffop.forward_input_fields],
-                              [f.dtype.numpy_dtype for f in autodiffop._forward_output_fields],
-                              name=autodiffop.op_name, stateful=False, grad=backward)
-
-    return output_tensors
diff --git a/tests/lbm/backends/_tensorflow_cpp.py b/tests/lbm/backends/_tensorflow_cpp.py
deleted file mode 100644
index 541e122..0000000
--- a/tests/lbm/backends/_tensorflow_cpp.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""Implementing a custom Tensorflow Op in C++ has some advantages and disadvantages
-
-Advantages:
-- GPU support without any hacks
-- Access to raw tensors without conversion to numpy
-- Custom Ops will be serializable
-
-Disadavantages:
-- C++ Code has to be build with correct parameters and ABI for present Tensorflow version (best integrated into Tensorflow build)
-
-"""
-
-
-# raise NotImplementedError()
diff --git a/tests/lbm/backends/_torch_native.py b/tests/lbm/backends/_torch_native.py
deleted file mode 100644
index bb21e45..0000000
--- a/tests/lbm/backends/_torch_native.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import os
-import types
-import uuid
-from itertools import chain
-from os.path import dirname, isdir, isfile, join
-
-import jinja2
-import torch
-from appdirs import user_cache_dir
-
-import pystencils
-import pystencils_autodiff
-import pystencils_autodiff.backends._pytorch
-from pystencils.astnodes import FieldShapeSymbol
-from pystencils.backends.cbackend import generate_c
-from pystencils.backends.cuda_backend import CudaSympyPrinter, generate_cuda
-from pystencils.cpu.kernelcreation import create_kernel
-from pystencils.gpucuda.kernelcreation import create_cuda_kernel
-from pystencils_autodiff.backends._pytorch import numpy_dtype_to_torch
-
-
-def _read_file(file):
-    with open(file, 'r') as f:
-        return f.read()
-
-
-def _write_file(filename, content):
-    with open(filename, 'w') as f:
-        return f.write(content)
-
-
-def generate_torch(destination_folder,
-                   autodiff: pystencils_autodiff.AutoDiffOp,
-                   is_cuda,
-                   dtype,
-                   forward_ast=None,
-                   backward_ast=None):
-    shape = autodiff.forward_output_fields[0].spatial_shape
-    operation_hash = abs(hash(autodiff) + hash(shape) + hash(str(dtype)))
-    operation_string = "%s_native_%s_%s_%x" % (
-        autodiff.op_name, 'cuda' if is_cuda else 'cpu', 'x'.join(str(s) for s in shape), operation_hash)
-
-    cpp_file = join(destination_folder, operation_string + '.cpp')
-    cuda_kernel_file = join(destination_folder, operation_string + '.cu')
-
-    required_files = [cpp_file, cuda_kernel_file] if is_cuda else [cpp_file]
-
-    if not all(isfile(x) for x in required_files):
-        generate_ast = create_cuda_kernel if is_cuda else create_kernel
-        generate_code = generate_cuda if is_cuda else generate_c
-
-        if not forward_ast:
-            forward_ast = generate_ast(autodiff.forward_assignments.all_assignments)
-        if not backward_ast:
-            backward_ast = generate_ast(autodiff.backward_assignments.all_assignments)
-
-        forward_ast.subs({s: FieldShapeSymbol(
-            [autodiff.forward_output_fields[0].name], s.coordinate) for s in forward_ast.atoms(FieldShapeSymbol)})
-        backward_ast.subs({s: FieldShapeSymbol(
-            [autodiff.backward_output_fields[0].name], s.coordinate) for s in backward_ast.atoms(FieldShapeSymbol)})
-        # backward_ast.subs({s: FieldStrideSymbol(
-        # autodiff.forward_input_fields[0].name, s.coordinate) for s in forward_ast.atoms(FieldStrideSymbol)})
-
-        forward_code = generate_code(forward_ast.body).replace(
-            'float *', 'scalar_t *').replace('double *', 'scalar_t *')
-        backward_code = generate_code(backward_ast.body).replace(
-            'float *', 'scalar_t *').replace('double *', 'scalar_t *')
-
-        if is_cuda:
-            printer = CudaSympyPrinter()
-            block_and_thread_numbers = forward_ast.indexing.call_parameters(shape)
-            forward_block = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['block'])
-            forward_grid = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['grid'])
-            backward_shape = autodiff.backward_output_fields[0].spatial_shape
-            block_and_thread_numbers = backward_ast.indexing.call_parameters(backward_shape)
-            backward_block = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['block'])
-            backward_grid = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['grid'])
-            cuda_globals = pystencils.backends.cbackend.get_global_declarations(forward_ast) | \
-                pystencils.backends.cbackend.get_global_declarations(backward_ast)
-            cuda_globals = [generate_cuda(g) for g in cuda_globals]
-        else:
-            backward_block = forward_block = "INVALID"
-            backward_grid = forward_grid = "INVALID"
-            cuda_globals = ""
-
-        render_dict = {
-            "forward_tensors": [f for f in autodiff.forward_fields],
-            "forward_input_tensors": [f for f in autodiff.forward_input_fields],
-            "forward_output_tensors": [f for f in autodiff.forward_output_fields],
-            "backward_tensors": [f for f in autodiff.backward_fields + autodiff.forward_input_fields],
-            "backward_input_tensors": [f for f in autodiff.backward_input_fields],
-            "backward_output_tensors": [f for f in autodiff.backward_output_fields],
-            "forward_kernel": forward_code,
-            "backward_kernel": backward_code,
-            "dimensions": range(autodiff.forward_fields[0].spatial_dimensions),
-            "kernel_name": operation_string,
-            "forward_threads": "{" + forward_block + "}",
-            "forward_blocks": "{" + forward_grid + "}",
-            "backward_threads": "{" + backward_block + "}",
-            "backward_blocks": "{" + backward_grid + "}",
-            "cuda_globals": cuda_globals,
-            "dtype": pystencils.data_types.BasicType(dtype)
-        }
-
-        if is_cuda:
-            template_string_cpp = _read_file(join(dirname(__file__),
-                                                  'torch_native_cuda.tmpl.cpp'))
-            template = jinja2.Template(template_string_cpp)
-            output = template.render(render_dict)
-            _write_file(join(destination_folder, operation_string + '.cpp'), output)
-
-            template_string = _read_file(join(dirname(__file__), 'torch_native_cuda.tmpl.cu'))
-            template = jinja2.Template(template_string)
-            output = template.render(render_dict)
-            _write_file(join(destination_folder, operation_string + '.cu'), output)
-        else:
-            template_string_cpp = _read_file(join(dirname(__file__),
-                                                  'torch_native_cpu.tmpl.cpp'))
-            template = jinja2.Template(template_string_cpp)
-            output = template.render(render_dict)
-            _write_file(join(destination_folder, operation_string + '.cpp'), output)
-
-    from torch.utils.cpp_extension import load
-    compiled_operation = load(operation_string, required_files, verbose=True,
-                              extra_cuda_cflags=[] if is_cuda else [])
-    return compiled_operation
-
-
-def create_autograd_function(autodiff_obj, inputfield_to_tensor_dict, forward_loop=None, backward_loop=None):
-    if forward_loop is None:
-        assert backward_loop is None
-        is_cuda = all(t.is_cuda for t in inputfield_to_tensor_dict.values())
-        assert all(t.is_cuda for t in inputfield_to_tensor_dict.values()) or \
-            all(not t.is_cuda for t in inputfield_to_tensor_dict.values()), "All tensor should be on GPU or all on CPU"
-        dtype = pystencils_autodiff.backends._pytorch.torch_dtype_to_numpy(
-            list(inputfield_to_tensor_dict.values())[0].dtype)
-
-        cache_dir = user_cache_dir('pystencils')
-        if not isdir(cache_dir):
-            os.mkdir(cache_dir)
-        # TODO: create function and stuff
-
-        compiled_operation = generate_torch(cache_dir, autodiff_obj, is_cuda,
-                                            dtype)
-        field_to_tensor_dict = inputfield_to_tensor_dict
-        # Allocate output tensor for forward and backward pass
-        for field in chain(autodiff_obj.forward_output_fields, autodiff_obj.backward_output_fields):
-            field_to_tensor_dict[field] = torch.zeros(
-                *field.shape,
-                dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype),
-                device=list(inputfield_to_tensor_dict.values())[0].device)
-
-        def forward(self):
-            self.saved = {f: field_to_tensor_dict[f] for f in chain(
-                autodiff_obj.forward_input_fields, autodiff_obj.backward_output_fields)}
-            compiled_operation.forward(**{f.name: field_to_tensor_dict[f] for f in autodiff_obj.forward_fields})
-            return tuple(field_to_tensor_dict[f] for f in autodiff_obj.forward_output_fields)
-
-        def backward(self, *grad_outputs):
-            self.saved.update({f.name: grad_outputs[i] for i, f in enumerate(autodiff_obj.backward_input_fields)})
-            compiled_operation.backward(**{f.name: t for f, t in self.saved.items()})
-            return tuple(self.saved[f] for f in autodiff_obj.backward_output_fields)
-
-        cls = type(str(uuid.uuid4()), (torch.autograd.Function,), {})
-        cls.saved = None
-        cls.forward = forward
-        cls.backward = backward
-        return cls
-    else:
-        op = pystencils_autodiff.backends._pytorch.create_autograd_function(autodiff_obj,
-                                                                            inputfield_to_tensor_dict,
-                                                                            forward_loop,
-                                                                            backward_loop,
-                                                                            convert_tensors_to_arrays=False)
-        return op
diff --git a/tests/lbm/backends/torch_native_cpu.tmpl.cpp b/tests/lbm/backends/torch_native_cpu.tmpl.cpp
deleted file mode 100644
index 0f02323..0000000
--- a/tests/lbm/backends/torch_native_cpu.tmpl.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <torch/extension.h>
-
-#include <vector>
-
-using namespace pybind11::literals;
-
-using scalar_t = {{ dtype }};
-
-
-
-std::vector<at::Tensor> {{ kernel_name }}_forward(
-{%- for tensor in forward_tensors -%}
-    at::Tensor {{ tensor }} {{- ", " if not loop.last -}}
-{%- endfor %})
-{
-    //{% for tensor in forward_output_tensors -%}
-    //auto {{tensor}} = at::zeros_like({{ forward_input_tensors[0] }});
-    //{% endfor %}
-
-    {% for i in dimensions -%}
-    int _size_{{ forward_tensors[0] }}_{{ i }} = {{ forward_tensors[0] }}.size({{ i }});
-    {% endfor %}
-
-    {% for tensor in forward_tensors -%}
-    {%- set last = loop.last -%}
-    scalar_t* _data_{{ tensor }} = {{ tensor }}.data<scalar_t>();
-    {% for i in dimensions -%}
-    int _stride_{{tensor}}_{{i}} = {{tensor}}.strides()[{{ i }}];
-    {% endfor -%}
-    {% endfor -%}
-
-    {{forward_kernel}}
-
-    return {
-    {%- for tensor in forward_output_tensors -%}
-    {{ tensor }} {{- "," if not loop.last -}}
-    {% endfor -%}
-    };
-}
-
-std::vector<at::Tensor> {{ kernel_name }}_backward(
-{%- for tensor in backward_tensors -%}
-    at::Tensor {{ tensor }} {{- ", " if not loop.last -}}
-{% endfor %})
-{
-    //{% for tensor in backward_output_tensors -%}
-    //auto {{tensor}} = at::zeros_like({{ backward_input_tensors[0] }});
-    //{% endfor %}
-
-    {% for tensor in backward_tensors -%}
-    {%- set last = loop.last -%}
-    scalar_t* _data_{{ tensor }} = {{ tensor }}.data<scalar_t>();
-    {% for i in dimensions -%}
-    int _stride_{{ tensor }}_{{i}} = {{ tensor }}.strides()[{{ i }}];
-    {% endfor -%}
-    {% endfor -%}
-
-    {{backward_kernel}}
-
-    return {
-    {%- for tensor in backward_output_tensors -%}
-    {{ tensor }} {{- "," if not loop.last -}}
-    {% endfor -%}
-    };
-}
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("forward", &{{ kernel_name }}_forward, "{{ kernel_name }} forward (CPU)",
-{%- for tensor in forward_tensors -%}
-    "{{ tensor }}"_a {{ ", " if not loop.last }}  
-{%- endfor -%} );
-  m.def("backward", &{{ kernel_name }}_backward, "{{ kernel_name }} backward (CPU)",
-{%- for tensor in backward_tensors -%}
-    "{{ tensor }}"_a {{ ", " if not loop.last }}  
-{%- endfor -%} );
-}
diff --git a/tests/lbm/backends/torch_native_cuda.tmpl.cpp b/tests/lbm/backends/torch_native_cuda.tmpl.cpp
deleted file mode 100644
index 0662090..0000000
--- a/tests/lbm/backends/torch_native_cuda.tmpl.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <torch/extension.h>
-#include <vector>
-
-// CUDA forward declarations
-using namespace pybind11::literals;
-
-void {{ kernel_name }}_cuda_forward(
-{%- for tensor in forward_tensors %}
-    at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
-{% endfor %});
-
-std::vector<at::Tensor> {{ kernel_name }}_cuda_backward(
-{%- for tensor in backward_tensors -%}
-    at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
-{% endfor %});
-
-// C++ interface
-
-// NOTE: AT_ASSERT has become AT_CHECK on master after 0.4.
-#define CHECK_CUDA(x)                                                          \
-  AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
-#define CHECK_CONTIGUOUS(x)                                                    \
-  //AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
-#define CHECK_INPUT(x) CHECK_CUDA(x);       
-  //CHECK_CONTIGUOUS(x)
-
-std::vector<at::Tensor> {{ kernel_name }}_forward(
-{%- for tensor in forward_tensors -%}
-    at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
-{%- endfor %})
-{
-    {% for tensor in forward_tensors -%}
-    CHECK_INPUT({{ tensor.name }});
-    {% endfor %}
-
-    {{ kernel_name }}_cuda_forward(
-        {%- for tensor in forward_tensors %}
-        {{ tensor.name }} {{- ", " if not loop.last }}
-        {%- endfor %});
-
-    return std::vector<at::Tensor>{
-        {%- for tensor in forward_output_tensors %}
-        {{ tensor.name }} {{- ", " if not loop.last }}
-        {%- endfor %}
-    }
-        ;
-}
-
-std::vector<at::Tensor> {{ kernel_name }}_backward(
-{%- for tensor in backward_tensors -%}
-    at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
-{% endfor %})
-{
-    {%- for tensor in forward_input_tensors + backward_input_tensors -%}
-    CHECK_INPUT({{ tensor }});
-    {% endfor %}
-    {{ kernel_name }}_cuda_backward(
-        {%- for tensor in backward_tensors -%}
-        {{ tensor.name }} {{- ", " if not loop.last }}
-        {%- endfor %});
-
-    return std::vector<at::Tensor>{
-        {%- for tensor in backward_output_tensors %}
-        {{ tensor.name }} {{- ", " if not loop.last }}
-        {%- endfor %}
-    }
-        ;
-}
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("forward", &{{ kernel_name }}_forward, "{{ kernel_name }} forward (CUDA)",
-{%- for tensor in forward_tensors -%}
-    "{{ tensor.name }}"_a {{ ", " if not loop.last }}  
-{%- endfor -%} );
-  m.def("backward", &{{ kernel_name }}_backward, "{{ kernel_name }} backward (CUDA)",
-{%- for tensor in backward_tensors -%}
-    "{{ tensor.name }}"_a {{ ", " if not loop.last }}  
-{%- endfor -%} );
-}
diff --git a/tests/lbm/backends/torch_native_cuda.tmpl.cu b/tests/lbm/backends/torch_native_cuda.tmpl.cu
deleted file mode 100644
index bd80ada..0000000
--- a/tests/lbm/backends/torch_native_cuda.tmpl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-
-#include <ATen/ATen.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-#include <vector>
-
-{% for g in cuda_globals -%}
-{{ g }}
-{% endfor %}
-
-template <typename scalar_t>
-__global__ void {{ kernel_name }}_cuda_forward_kernel(
-        {% for tensor in forward_tensors -%}
-        {%- set last = loop.last -%}
-        scalar_t* __restrict__ _data_{{ tensor.name }},
-        {% for i in range(tensor.spatial_dimensions )-%}
-        int _stride_{{ tensor.name }}_{{ i }} {{- ", " }} 
-        {% endfor -%} 
-        {% endfor -%}
-        {% for i in range(forward_output_tensors[0].spatial_dimensions )-%}
-        int _size_{{ forward_output_tensors[0] }}_{{ i }} {{- "," if not loop.last }}
-        {% endfor %})
-{
-    {{forward_kernel}}
-}
-    
-template <typename scalar_t>
-__global__ void {{ kernel_name }}_cuda_backward_kernel(
-        {% for tensor in backward_tensors -%}
-        {%- set last = loop.last -%}
-        scalar_t* __restrict__ _data_{{ tensor.name }},
-        {% for i in range(tensor.spatial_dimensions )-%}
-        int _stride_{{ tensor.name }}_{{ i }} {{- ", " }}
-        {% endfor -%}
-        {% endfor -%}
-        {% for i in range(forward_output_tensors[0].spatial_dimensions )-%}
-        int _size_{{ forward_output_tensors[0].name }}_{{ i }} {{- "," if not loop.last }}
-        {% endfor %})
-{
-    {{backward_kernel}}
-}
-
-void {{ kernel_name }}_cuda_forward(
-    {%- for tensor in forward_tensors -%}
-    at::Tensor {{ tensor.name }} {{- "," if not loop.last -}}
-    {%- endfor -%})
-{
-
-    {% for i in range(forward_output_tensors[0].spatial_dimensions )-%}
-    int _size_{{ forward_output_tensors[0].name }}_{{ i }} = {{ forward_output_tensors[0].name }}.size({{ i }});
-    {% endfor %}
-
-/*at:: at::device(at::kCUDA).dtype(k{{ dtype }})*/
-    AT_DISPATCH_FLOATING_TYPES({{ forward_input_tensors[0].name }}.type(), "{{ kernel_name }}_forward_cuda", ([&] {
-                {{ kernel_name }}_cuda_forward_kernel<scalar_t><<<dim3{{ forward_blocks }}, dim3{{ forward_threads }}>>>(
-                        {% for tensor in forward_tensors -%}
-                        {%- set last = loop.last -%}
-                        {{tensor.name}}.data<scalar_t>(),
-                        {% for i in range(tensor.spatial_dimensions) -%}
-                        {{tensor.name}}.strides()[{{ i }}] {{- "," }}
-                        {% endfor -%}
-                        {% endfor -%}
-                        {% for i in range(forward_output_tensors[0].spatial_dimensions) -%}
-                        {{ forward_output_tensors[0].name }}.size({{ i }}) {{- "," if not loop.last }}
-                        {% endfor %}
-                        );
-                }));
-     cudaError_t err = cudaGetLastError();
-     if (err) {
-        fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
-        throw err;
-     }
-}
-
-void {{ kernel_name }}_cuda_backward(
-    {%- for tensor in backward_tensors -%}
-    at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
-    {%- endfor %})
-{
-
-    {% for i in range(backward_output_tensors[0].spatial_dimensions )-%}
-    int _size_{{ backward_output_tensors[0].name }}_{{ i }} = {{ backward_output_tensors[0].name }}.size({{ i }});
-    {% endfor %}
-
-/*at:: at::device(at::kCUDA).dtype(k{{ dtype }})*/
-    AT_DISPATCH_FLOATING_TYPES({{ backward_input_tensors[0].name }}.type(), "{{ kernel_name }}_backward_cuda", ([&] {
-                {{ kernel_name }}_cuda_backward_kernel<scalar_t><<<dim3{{ backward_blocks }}, dim3{{ backward_threads }}>>>(
-                        {% for tensor in backward_tensors -%}
-                        {%- set last = loop.last -%}
-                        {{tensor.name}}.data<scalar_t>(),
-                        {% for i in range(tensor.spatial_dimensions )-%}
-                        {{tensor.name}}.strides()[{{ i }}]{{- ", " }}
-                        {% endfor -%}
-                        {% endfor -%}
-                        {% for i in range(backward_output_tensors[0].spatial_dimensions )-%}
-                        {{ backward_output_tensors[0].name }}.size({{ i }}) {{- "," if not loop.last }}
-                        {% endfor %}
-                        );
-                }));
-
-}
diff --git a/tests/lbm/losses.py b/tests/lbm/losses.py
deleted file mode 100644
index b30b44a..0000000
--- a/tests/lbm/losses.py
+++ /dev/null
@@ -1,27 +0,0 @@
-try:
-    import tensorflow as tf
-except Exception:
-    pass
-
-
-def masked_mse(a, b, mask):
-    """ Mean squared error within mask """
-    return tf.losses.mean_pairwise_squared_error(tf.boolean_mask(a, mask), tf.boolean_mask(b, mask))
-
-
-def total_var(tensor, norm=2):
-    pixel_dif1 = tensor[1:, :] - tensor[:-1, :]
-    pixel_dif2 = tensor[:, 1:] - tensor[:, :-1]
-    if norm == 2:
-        return tf.reduce_sum(pixel_dif1 * pixel_dif1) + tf.reduce_sum(pixel_dif2 * pixel_dif2)
-    if norm == 1:
-        return tf.reduce_sum(tf.abs(pixel_dif1)) + tf.reduce_sum(tf.abs(pixel_dif2))
-
-
-def mean_total_var(tensor, norm=2):
-    pixel_dif1 = tensor[1:, :] - tensor[:-1, :]
-    pixel_dif2 = tensor[:, 1:] - tensor[:, :-1]
-    if norm == 2:
-        return (tf.reduce_mean(pixel_dif1 * pixel_dif1) + tf.reduce_mean(pixel_dif2 * pixel_dif2)) / 2
-    if norm == 1:
-        return (tf.reduce_mean(tf.abs(pixel_dif1)) + tf.reduce_mean(tf.abs(pixel_dif2))) / 2
-- 
GitLab