diff --git a/.travis.yml b/.travis.yml
index f80f482198c5f1e9d563fc9415eb0782f61f55f9..bd5987b873f292c98119113fffed83867cfca901 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,8 +7,6 @@ sudo: false
 language: python
 virtualenv:
   system_site_packages: false
-before_install:
-  - sudo apt-get install -y ninja-build build-essential nvidia-cuda-toolkit
 addons:
   apt:
     update: true
@@ -17,19 +15,50 @@ matrix:
   include:
     - python: 3.6
       env: DISTRIB="ubuntu" TOX_PYTHON_VERSION="py36" COVERAGE="true" LINT="false"
-    - env: DISTRIB="conda" PYTHON_VERSION="3.6" COVERAGE="false" LINT="false"
-    - env: DISTRIB="ubuntu" TOX_PYTHON_VERSION="py36" COVERAGE="false" LINT="true"
+      before_install:
+        - sudo apt-get install -y ninja-build build-essential nvidia-cuda-toolkit
+    - python: 3.6
+      name: "Pystencils from pip"
+      env: DISTRIB="ubuntu" TOX_PYTHON_VERSION="py36" COVERAGE="true" LINT="false" PYSTENCIL_FROM_PIP="true"
+      before_install:
+        - sudo apt-get install -y ninja-build build-essential nvidia-cuda-toolkit
+    - name: "Conda"
+      env: DISTRIB="conda" PYTHON_VERSION="3.6" COVERAGE="false" LINT="false"
+      before_install:
+        - sudo apt-get install -y ninja-build build-essential nvidia-cuda-toolkit
+    - name: "Lint and documentation test"
+      env: DISTRIB="ubuntu" TOX_PYTHON_VERSION="py36" COVERAGE="false" LINT="true"
+    - name: "Python 3.7.2 on macOS"
+      os: osx
+      osx_image: xcode10.2  # Python 3.7.2 running on macOS 10.14.3
+      language: shell       # 'language: python' is an error on Travis CI macOS
+      before_install:
+       - brew update && brew upgrade python 
+       - brew install ninja
+       - alias python=python3  
+       - alias pip="python3 -m pip"
+       - shopt -s expand_aliases
+      before_cache:
+        - brew cleanup
+    - name: "Python 3.7.3 on Windows"
+      os: windows           # Windows 10.0.17134 N/A Build 17134
+      language: shell       # 'language: python' is an error on Travis CI Windows
+      before_install:
+       - choco install python
+       - python -m pip install --upgrade pip
+      env: PATH=/c/Python37:/c/Python37/Scripts:$PATH
 install:
   - source tests/travis_install.sh
+  - pip3 install -e .
+  - pip3 install tensorflow torch || echo "failed to install machine learning stuff"
 before_script:
-  - git config --global user.email "you@example.com"
-  - git config --global user.name "Your Name"
+  - git config --global user.email "stephan.seitz@fau.de"
+  - git config --global user.name "Stephan Seitz"
 script:
   - export NO_GPU_EXECUTION=1
-  - pip install -e .
-  - pip install tensorflow torch
-  - if [[ "$LINT" == "false" ]]; then python setup.py test; fi
-  - if [[ "$LINT" == "true" ]]; then flake8 src;python setup.py doctest; fi
+  - if [[ "$LINT" == "true" ]]; then flake8 src;python setup.py doctest; exit 0; fi
+  - python setup.py test
+
 after_success:
   - if [[ "$COVERAGE" == "true" ]]; then coveralls || echo "failed"; codecov; fi
 after_script:
@@ -38,3 +67,5 @@ cache:
   pip: true
   directories:
     - $HOME/miniconda
+    - /c/Python37
+    - $HOME/Library/Caches/Homebrew
diff --git a/README.rst b/README.rst
index 21a02f01cac3d13dc3d467bf1e581b169876e2f3..10094f73358664901e55e90da4ae7bdd92c60187 100644
--- a/README.rst
+++ b/README.rst
@@ -19,7 +19,7 @@
 pystencils_autodiff
 ===================
 
-This repo adds automatic differentiation to `pystencils <https://i10git.cs.fau.de/seitz/pystencils>`_.
+This repo adds automatic differentiation to `pystencils <https://i10git.cs.fau.de/pycodegen/pystencils>`_.
 
 Installation
 ------------
@@ -30,7 +30,7 @@ Install via pip:
 
    pip install pystencils-autodiff
 
-or if you downloaded this `repository <https://github.com/theHamsta/pystencils_autodiff>`_ using:
+or if you downloaded this `repository <https://github.com/pycodegen/pystencils_autodiff>`_ using:
 
 .. code-block:: bash
 
@@ -52,7 +52,7 @@ Create a `pystencils.AssignmentCollection` with pystencils:
     import sympy
     import pystencils
 
-    z, x, y = pystencils.fields("z, y, x: [20,30]")
+    z, y, x = pystencils.fields("z, y, x: [20,30]")
 
     forward_assignments = pystencils.AssignmentCollection({
         z[0, 0]: x[0, 0] * sympy.log(x[0, 0] * y[0, 0])
@@ -65,7 +65,7 @@ Create a `pystencils.AssignmentCollection` with pystencils:
 
     Subexpressions:
     Main Assignments:
-         z[0,0] ← y_C*log(x_C*y_C)
+         z[0,0] ← x_C*log(x_C*y_C)
    
 You can then obtain the corresponding backward assignments:
 
@@ -82,8 +82,8 @@ You can see the derivatives with respective to the two inputs multiplied by the
 
     Subexpressions:
     Main Assignments:
-        \hat{y}[0,0] ← diffz_C*(log(x_C*y_C) + 1)
-        \hat{x}[0,0] ← diffz_C*y_C/x_C
+        \hat{x}[0,0] ← diffz_C*(log(x_C*y_C) + 1)
+        \hat{y}[0,0] ← diffz_C*x_C/y_C
 
 You can also use the class `AutoDiffOp` to obtain both the assignments (if you are curious) and auto-differentiable operations for Tensorflow...
 
diff --git a/docs/index.rst b/docs/index.rst
index e38fda823dcc9dc798d8fddcf2d442d04e7df3ec..ceb01f4933432e3183c2686fa721ef5653f6b53b 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -38,7 +38,7 @@ Create a :class:`pystencils.AssignmentCollection` with pystencils:
     import sympy
     import pystencils
 
-    z, x, y = pystencils.fields("z, y, x: [20,30]")
+    z, y, x = pystencils.fields("z, y, x: [20,30]")
 
     forward_assignments = pystencils.AssignmentCollection({
         z[0, 0]: x[0, 0] * sympy.log(x[0, 0] * y[0, 0])
@@ -52,7 +52,7 @@ Create a :class:`pystencils.AssignmentCollection` with pystencils:
 
     Subexpressions:
     Main Assignments:
-         z[0,0] ← y_C*log(x_C*y_C)
+         z[0,0] ← x_C*log(x_C*y_C)
    
 You can then obtain the corresponding backward assignments:
 
@@ -61,7 +61,7 @@ You can then obtain the corresponding backward assignments:
     from pystencils.autodiff import AutoDiffOp, create_backward_assignments
     backward_assignments = create_backward_assignments(forward_assignments)
 
-    # Sorting for reprudcible outputs
+    # Sorting for reproducible outputs
     backward_assignments.main_assignments = sorted(backward_assignments.main_assignments, key=lambda a: str(a))
 
     print(backward_assignments)
@@ -72,9 +72,10 @@ You can see the derivatives with respective to the two inputs multiplied by the
     :options: -ELLIPSIS, +NORMALIZE_WHITESPACE
 
     Subexpressions:
+
     Main Assignments:
-        \hat{x}[0,0] ← diffz_C*y_C/x_C
-        \hat{y}[0,0] ← diffz_C*(log(x_C*y_C) + 1)
+        \hat{x}[0,0] ← diffz_C*(log(x_C*y_C) + 1)
+        \hat{y}[0,0] ← diffz_C*x_C/y_C
 
 You can also use the class :class:`.autodiff.AutoDiffOp` to obtain both the assignments (if you are curious) and auto-differentiable operations for Tensorflow...
 
diff --git a/setup.cfg b/setup.cfg
index deccca96f9f8043a08ae84adb761407ab7d23bd0..6b56aeca25f159ed480ad414706ca74bed5a1faf 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -32,10 +32,16 @@ setup_requires = pyscaffold>=3.2a0,<3.3a0
 # Add here dependencies of your project (semicolon/line-separated), e.g.
 install_requires =
     numpy
-    pystencils>=0.2.6
+    pystencils>=0.2.7
     jinja2
     stringcase # for converting to camelcase for tensorflow
-#
+# The usage of test_requires is discouraged, see `Dependency Management` docs
+test_requires =
+    pytest
+    pytest-html
+    ansi2html
+    pytest-cov
+    tensorflow
 # Require a specific Python version, e.g. Python 2.7 or >= 3.4
 # python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*
 
@@ -50,14 +56,11 @@ exclude =
 # PDF = ReportLab; RXP
 # Add here test requirements (semicolon/line-separated)
 testing =
-    pytest
-    pytest-cov
     pytest
     pytest-html
     ansi2html
     pytest-cov
     tensorflow
-    torch
 
 [options.entry_points]
 # Add here console scripts like:
diff --git a/src/pystencils_autodiff/backends/_pytorch.py b/src/pystencils_autodiff/backends/_pytorch.py
index 22242763a6830c571af6d155faf6c2f76e29e598..659a437db1b1babcda44be57379df2b3f81c24f1 100644
--- a/src/pystencils_autodiff/backends/_pytorch.py
+++ b/src/pystencils_autodiff/backends/_pytorch.py
@@ -1,7 +1,10 @@
 import uuid
 
 import numpy as np
-import torch
+try:
+    import torch
+except ImportError:
+    pass
 
 try:
     import pycuda.autoinit
diff --git a/src/pystencils_autodiff/backends/_torch_native.py b/src/pystencils_autodiff/backends/_torch_native.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcd1ac9cd45b47f3ba7753622fd652040eb3daa3
--- /dev/null
+++ b/src/pystencils_autodiff/backends/_torch_native.py
@@ -0,0 +1,179 @@
+import os
+import uuid
+from itertools import chain
+from os.path import dirname, isdir, isfile, join
+
+import jinja2
+from appdirs import user_cache_dir
+
+import pystencils
+import pystencils_autodiff
+import pystencils_autodiff.backends._pytorch
+from pystencils.astnodes import FieldShapeSymbol
+from pystencils.backends.cbackend import generate_c
+from pystencils.backends.cuda_backend import CudaSympyPrinter, generate_cuda
+from pystencils.cpu.kernelcreation import create_kernel
+from pystencils.gpucuda.kernelcreation import create_cuda_kernel
+from pystencils_autodiff.backends._pytorch import numpy_dtype_to_torch
+
+try:
+    import torch
+except ImportError:
+    pass
+
+
+def _read_file(file):
+    with open(file, 'r') as f:
+        return f.read()
+
+
+def _write_file(filename, content):
+    with open(filename, 'w') as f:
+        return f.write(content)
+
+
+def generate_torch(destination_folder,
+                   autodiff: pystencils_autodiff.AutoDiffOp,
+                   is_cuda,
+                   dtype,
+                   forward_ast=None,
+                   backward_ast=None):
+    shape = autodiff.forward_output_fields[0].spatial_shape
+    operation_hash = abs(hash(autodiff) + hash(shape) + hash(str(dtype)))
+    operation_string = "{}_native_{}_{}_{:x}".format(
+        autodiff.op_name, 'cuda' if is_cuda else 'cpu', 'x'.join(str(s) for s in shape), operation_hash)
+
+    cpp_file = join(destination_folder, operation_string + '.cpp')
+    cuda_kernel_file = join(destination_folder, operation_string + '.cu')
+
+    required_files = [cpp_file, cuda_kernel_file] if is_cuda else [cpp_file]
+
+    if not all(isfile(x) for x in required_files):
+        generate_ast = create_cuda_kernel if is_cuda else create_kernel
+        generate_code = generate_cuda if is_cuda else generate_c
+
+        if not forward_ast:
+            forward_ast = generate_ast(autodiff.forward_assignments.all_assignments)
+        if not backward_ast:
+            backward_ast = generate_ast(autodiff.backward_assignments.all_assignments)
+
+        forward_ast.subs({s: FieldShapeSymbol(
+            [autodiff.forward_output_fields[0].name], s.coordinate) for s in forward_ast.atoms(FieldShapeSymbol)})
+        backward_ast.subs({s: FieldShapeSymbol(
+            [autodiff.backward_output_fields[0].name], s.coordinate) for s in backward_ast.atoms(FieldShapeSymbol)})
+        # backward_ast.subs({s: FieldStrideSymbol(
+        # autodiff.forward_input_fields[0].name, s.coordinate) for s in forward_ast.atoms(FieldStrideSymbol)})
+
+        forward_code = generate_code(forward_ast.body).replace(
+            'float *', 'scalar_t *').replace('double *', 'scalar_t *')
+        backward_code = generate_code(backward_ast.body).replace(
+            'float *', 'scalar_t *').replace('double *', 'scalar_t *')
+
+        if is_cuda:
+            printer = CudaSympyPrinter()
+            block_and_thread_numbers = forward_ast.indexing.call_parameters(shape)
+            forward_block = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['block'])
+            forward_grid = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['grid'])
+            backward_shape = autodiff.backward_output_fields[0].spatial_shape
+            block_and_thread_numbers = backward_ast.indexing.call_parameters(backward_shape)
+            backward_block = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['block'])
+            backward_grid = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['grid'])
+            cuda_globals = pystencils.backends.cbackend.get_global_declarations(forward_ast) | \
+                pystencils.backends.cbackend.get_global_declarations(backward_ast)
+            cuda_globals = [generate_cuda(g) for g in cuda_globals]
+        else:
+            backward_block = forward_block = "INVALID"
+            backward_grid = forward_grid = "INVALID"
+            cuda_globals = ""
+
+        render_dict = {
+            "forward_tensors": [f for f in autodiff.forward_fields],
+            "forward_input_tensors": [f for f in autodiff.forward_input_fields],
+            "forward_output_tensors": [f for f in autodiff.forward_output_fields],
+            "backward_tensors": [f for f in autodiff.backward_fields + autodiff.forward_input_fields],
+            "backward_input_tensors": [f for f in autodiff.backward_input_fields],
+            "backward_output_tensors": [f for f in autodiff.backward_output_fields],
+            "forward_kernel": forward_code,
+            "backward_kernel": backward_code,
+            "dimensions": range(autodiff.forward_fields[0].spatial_dimensions),
+            "kernel_name": operation_string,
+            "forward_threads": "{" + forward_block + "}",
+            "forward_blocks": "{" + forward_grid + "}",
+            "backward_threads": "{" + backward_block + "}",
+            "backward_blocks": "{" + backward_grid + "}",
+            "cuda_globals": cuda_globals,
+            "dtype": pystencils.data_types.BasicType(dtype)
+        }
+
+        if is_cuda:
+            template_string_cpp = _read_file(join(dirname(__file__),
+                                                  'torch_native_cuda.tmpl.cpp'))
+            template = jinja2.Template(template_string_cpp)
+            output = template.render(render_dict)
+            _write_file(join(destination_folder, operation_string + '.cpp'), output)
+
+            template_string = _read_file(join(dirname(__file__), 'torch_native_cuda.tmpl.cu'))
+            template = jinja2.Template(template_string)
+            output = template.render(render_dict)
+            _write_file(join(destination_folder, operation_string + '.cu'), output)
+        else:
+            template_string_cpp = _read_file(join(dirname(__file__),
+                                                  'torch_native_cpu.tmpl.cpp'))
+            template = jinja2.Template(template_string_cpp)
+            output = template.render(render_dict)
+            _write_file(join(destination_folder, operation_string + '.cpp'), output)
+
+    from torch.utils.cpp_extension import load
+    compiled_operation = load(operation_string, required_files, verbose=True,
+                              extra_cuda_cflags=[] if is_cuda else [])
+    compiled_operation.code = output
+    return compiled_operation
+
+
+def create_autograd_function(autodiff_obj, inputfield_to_tensor_dict, forward_loop=None, backward_loop=None):
+    if forward_loop is None:
+        assert backward_loop is None
+        is_cuda = all(t.is_cuda for t in inputfield_to_tensor_dict.values())
+        assert all(t.is_cuda for t in inputfield_to_tensor_dict.values()) or \
+            all(not t.is_cuda for t in inputfield_to_tensor_dict.values()), "All tensor should be on GPU or all on CPU"
+        dtype = pystencils_autodiff.backends._pytorch.torch_dtype_to_numpy(
+            list(inputfield_to_tensor_dict.values())[0].dtype)
+
+        cache_dir = user_cache_dir('pystencils')
+        if not isdir(cache_dir):
+            os.mkdir(cache_dir)
+        # TODO: create function and stuff
+
+        compiled_operation = generate_torch(cache_dir, autodiff_obj, is_cuda, dtype)
+        field_to_tensor_dict = inputfield_to_tensor_dict
+        # Allocate output tensor for forward and backward pass
+        for field in chain(autodiff_obj.forward_output_fields, autodiff_obj.backward_output_fields):
+            field_to_tensor_dict[field] = torch.zeros(
+                *field.shape,
+                dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype),
+                device=list(inputfield_to_tensor_dict.values())[0].device)
+
+        def forward(self):
+            self.saved = {f: field_to_tensor_dict[f] for f in chain(
+                autodiff_obj.forward_input_fields, autodiff_obj.backward_output_fields)}
+            compiled_operation.forward(**{f.name: field_to_tensor_dict[f] for f in autodiff_obj.forward_fields})
+            return tuple(field_to_tensor_dict[f] for f in autodiff_obj.forward_output_fields)
+
+        def backward(self, *grad_outputs):
+            self.saved.update({f.name: grad_outputs[i] for i, f in enumerate(autodiff_obj.backward_input_fields)})
+            compiled_operation.backward(**{f.name: t for f, t in self.saved.items()})
+            return tuple(self.saved[f] for f in autodiff_obj.backward_output_fields)
+
+        cls = type(str(uuid.uuid4()), (torch.autograd.Function,), {})
+        cls.saved = None
+        cls.forward = forward
+        cls.backward = backward
+        cls.code = compiled_operation.code
+        return cls()
+    else:
+        op = pystencils_autodiff.backends._pytorch.create_autograd_function(autodiff_obj,
+                                                                            inputfield_to_tensor_dict,
+                                                                            forward_loop,
+                                                                            backward_loop,
+                                                                            convert_tensors_to_arrays=False)
+        return op
diff --git a/tests/backends/test_torch_native_compilation.py b/tests/backends/test_torch_native_compilation.py
index d9b350701cf013a3682af42359713310f6f1d0d4..2055941a1e92f25cdce6f6e62262543aef265e66 100644
--- a/tests/backends/test_torch_native_compilation.py
+++ b/tests/backends/test_torch_native_compilation.py
@@ -4,7 +4,7 @@
 #
 
 import os
-import tempfile
+import subprocess
 from os.path import dirname, isfile, join
 
 import pytest
@@ -15,6 +15,11 @@ from pystencils_autodiff import create_backward_assignments
 from pystencils_autodiff._file_io import _write_file
 from pystencils_autodiff.backends.astnodes import TorchModule
 
+torch = pytest.importorskip('torch')
+pytestmark = pytest.mark.skipif(subprocess.call(['ninja', '--v']) != 0,
+                                reason='torch compilation requires ninja')
+
+PROJECT_ROOT = dirname
 
 @pytest.mark.skipif("TRAVIS" in os.environ, reason="nvcc compilation currently not working on TRAVIS")
 def test_torch_jit():
diff --git a/tests/test_autodiff.py b/tests/test_autodiff.py
index 25dc2293309edfd2c1bf7a012c95a873fac580f6..217cc3124ceb8e7aec35a163bef363e95180ade2 100644
--- a/tests/test_autodiff.py
+++ b/tests/test_autodiff.py
@@ -48,12 +48,3 @@ def test_simple_2d_check_raw_assignments():
     for diff_mode in DiffModes:
         pystencils_autodiff.create_backward_assignments(
             forward_assignments, diff_mode=diff_mode)
-
-
-def main():
-    test_simple_2d_check_assignment_collection()
-    test_simple_2d_check_raw_assignments()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/tests/test_tfmad.py b/tests/test_tfmad.py
index 309ec94cf5ac920e48dfc7b8cb09a6a65ee6c08a..87efa0c974ccb342144decc7c53efa5e6cbb38d3 100644
--- a/tests/test_tfmad.py
+++ b/tests/test_tfmad.py
@@ -4,8 +4,6 @@ import os
 import numpy as np
 import pytest
 import sympy as sp
-import tensorflow as tf
-import torch
 
 import pystencils as ps
 import pystencils_autodiff
@@ -46,8 +44,8 @@ def test_tfmad_two_stencils():
     print(assignment_collection)
 
     print('Backward')
-    auto_diff = pystencils_autodiff.AutoDiffOp(
-        assignment_collection, diff_mode='transposed-forward')
+    auto_diff = pystencils_autodiff.AutoDiffOp(assignment_collection,
+                                               diff_mode='transposed-forward')
     backward = auto_diff.backward_assignments
     print(backward)
     print('Forward output fields (to check order)')
@@ -56,13 +54,15 @@ def test_tfmad_two_stencils():
     print(auto_diff)
 
 
-@pytest.mark.skipif("NO_TENSORFLOW_TEST" in os.environ, reason="Requires Tensorflow")
 @pytest.mark.skipif("TRAVIS" in os.environ, reason="Temporary skip")
 def test_tfmad_gradient_check():
+    tf = pytest.importorskip('tensorflow')
+
     a, b, out = ps.fields("a, b, out: double[21,13]")
     print(a.shape)
 
-    cont = ps.fd.Diff(a, 0) - ps.fd.Diff(a, 1) - ps.fd.Diff(b, 0) + ps.fd.Diff(b, 1)
+    cont = ps.fd.Diff(a, 0) - ps.fd.Diff(a, 1) - ps.fd.Diff(b, 0) + ps.fd.Diff(
+        b, 1)
     discretize = ps.fd.Discretization2ndOrder(dx=1)
     discretization = discretize(cont)
 
@@ -72,8 +72,8 @@ def test_tfmad_gradient_check():
     print(assignment_collection)
 
     print('Backward')
-    auto_diff = pystencils_autodiff.AutoDiffOp(
-        assignment_collection, diff_mode='transposed-forward')
+    auto_diff = pystencils_autodiff.AutoDiffOp(assignment_collection,
+                                               diff_mode='transposed-forward')
     backward = auto_diff.backward_assignments
     print(backward)
     print('Forward output fields (to check order)')
@@ -87,7 +87,11 @@ def test_tfmad_gradient_check():
         sess.run(tf.global_variables_initializer())
 
         gradient_error = compute_gradient_error_without_border(
-            [a_tensor, b_tensor], [a.shape, b.shape], out_tensor, out.shape, num_border_pixels=2, ndim=2)
+            [a_tensor, b_tensor], [a.shape, b.shape],
+            out_tensor,
+            out.shape,
+            num_border_pixels=2,
+            ndim=2)
         print('error: %s' % gradient_error.max_error)
 
         assert any(e < 1e-4 for e in gradient_error.values())
@@ -100,30 +104,31 @@ def check_tfmad_vector_input_data(args):
 
     # create arrays
     c_arr = np.zeros(domain_shape)
-    v_arr = np.zeros(domain_shape + (ndim,))
+    v_arr = np.zeros(domain_shape + (ndim, ))
 
     # create fields
-    c, v, c_next = ps.fields("c, v(2), c_next: % s[ % i, % i]" % ("float" if dtype == np.float32 else "double",
-                                                                  domain_shape[0],
-                                                                  domain_shape[1]),
+    c, v, c_next = ps.fields("c, v(2), c_next: % s[ % i, % i]" %
+                             ("float" if dtype == np.float32 else "double",
+                              domain_shape[0], domain_shape[1]),
                              c=c_arr,
                              v=v_arr,
                              c_next=c_arr)
 
     # write down advection diffusion pde
     # the equation is represented by a single term and an implicit "=0" is assumed.
-    adv_diff_pde = ps.fd.transient(
-        c) - ps.fd.diffusion(c, sp.Symbol("D")) + ps.fd.advection(c, v)
+    adv_diff_pde = ps.fd.transient(c) - ps.fd.diffusion(
+        c, sp.Symbol("D")) + ps.fd.advection(c, v)
 
     discretize = ps.fd.Discretization2ndOrder(args.dx, args.dt)
     discretization = discretize(adv_diff_pde)
-    discretization = discretization.subs(
-        sp.Symbol("D"), args.diffusion_coefficient)
+    discretization = discretization.subs(sp.Symbol("D"),
+                                         args.diffusion_coefficient)
     forward_assignments = ps.AssignmentCollection(
         [ps.Assignment(c_next.center(), discretization)], [])
 
     autodiff = pystencils_autodiff.AutoDiffOp(
-        forward_assignments, diff_mode='transposed-forward')  # , constant_fields=[v]
+        forward_assignments,
+        diff_mode='transposed-forward')  # , constant_fields=[v]
 
     print('Forward assignments:')
     print(autodiff.forward_assignments)
@@ -133,32 +138,31 @@ def check_tfmad_vector_input_data(args):
 
 def test_tfmad_vector_input_data():
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--domain_shape', default=(100, 30), nargs=2, type=int, help="")
-    parser.add_argument(
-        '--dx', default=1, type=float, help="")
-    parser.add_argument(
-        '--dt', default=0.01, type=float, help="")
-    parser.add_argument(
-        '--diffusion_coefficient', default=1, type=float, help="")
-    parser.add_argument(
-        '--num_total_time_steps', default=100, type=int)
-    parser.add_argument(
-        '--num_time_steps_for_op', default=1, type=int)
-    parser.add_argument(
-        '--learning_rate', default=1e-2, type=float)
-    parser.add_argument(
-        '--dtype', default=np.float64, type=np.dtype)
-    parser.add_argument(
-        '--num_optimization_steps', default=2000, type=int)
+    parser.add_argument('--domain_shape',
+                        default=(100, 30),
+                        nargs=2,
+                        type=int,
+                        help="")
+    parser.add_argument('--dx', default=1, type=float, help="")
+    parser.add_argument('--dt', default=0.01, type=float, help="")
+    parser.add_argument('--diffusion_coefficient',
+                        default=1,
+                        type=float,
+                        help="")
+    parser.add_argument('--num_total_time_steps', default=100, type=int)
+    parser.add_argument('--num_time_steps_for_op', default=1, type=int)
+    parser.add_argument('--learning_rate', default=1e-2, type=float)
+    parser.add_argument('--dtype', default=np.float64, type=np.dtype)
+    parser.add_argument('--num_optimization_steps', default=2000, type=int)
     parser.add_argument('vargs', nargs='*')
 
     args = parser.parse_args()
     check_tfmad_vector_input_data(args)
 
 
-@pytest.mark.skipif("NO_TORCH_TEST" in os.environ, reason="Requires PyTorch")
 def test_tfmad_gradient_check_torch():
+    torch = pytest.importorskip('torch')
+
     a, b, out = ps.fields("a, b, out: float[21,13]")
 
     cont = ps.fd.Diff(a, 0) - ps.fd.Diff(a, 1) - \
@@ -172,8 +176,8 @@ def test_tfmad_gradient_check_torch():
     print(assignment_collection)
 
     print('Backward')
-    auto_diff = pystencils_autodiff.AutoDiffOp(
-        assignment_collection, diff_mode='transposed-forward')
+    auto_diff = pystencils_autodiff.AutoDiffOp(assignment_collection,
+                                               diff_mode='transposed-forward')
     backward = auto_diff.backward_assignments
     print(backward)
     print('Forward output fields (to check order)')
@@ -182,7 +186,11 @@ def test_tfmad_gradient_check_torch():
     a_tensor = torch.zeros(*a.shape, dtype=torch.float64, requires_grad=True)
     b_tensor = torch.zeros(*b.shape, dtype=torch.float64, requires_grad=True)
 
-    function = auto_diff.create_tensorflow_op({a: a_tensor, b: b_tensor}, backend='torch')
+    function = auto_diff.create_tensorflow_op({
+        a: a_tensor,
+        b: b_tensor
+    },
+                                              backend='torch')
 
     torch.autograd.gradcheck(function.apply, [a_tensor, b_tensor])
 
@@ -230,31 +238,21 @@ def get_curl(input_field: ps.Field, curl_field: ps.Field):
 def test_tfmad_two_outputs():
 
     domain_shape = (20, 30)
-    vector_shape = domain_shape + (2,)
+    vector_shape = domain_shape + (2, )
 
-    curl_input_for_u = ps.Field.create_fixed_size(
-        field_name='curl_input', shape=domain_shape, index_dimensions=0)
-    u_field = ps.Field.create_fixed_size(
-        field_name='curl', shape=vector_shape, index_dimensions=1)
+    curl_input_for_u = ps.Field.create_fixed_size(field_name='curl_input',
+                                                  shape=domain_shape,
+                                                  index_dimensions=0)
+    u_field = ps.Field.create_fixed_size(field_name='curl',
+                                         shape=vector_shape,
+                                         index_dimensions=1)
 
-    curl_op = pystencils_autodiff.AutoDiffOp(get_curl(
-        curl_input_for_u, u_field), diff_mode="transposed-forward")
+    curl_op = pystencils_autodiff.AutoDiffOp(get_curl(curl_input_for_u,
+                                                      u_field),
+                                             diff_mode="transposed-forward")
 
     print('Forward')
     print(curl_op.forward_assignments)
 
     print('Backward')
     print(curl_op.backward_assignments)
-
-
-def main():
-    # test_tfmad_stencil()
-    # test_tfmad_two_stencils()
-    # test_tfmad_gradient_check_torch()
-    test_tfmad_gradient_check()
-    # test_tfmad_vector_input_data()
-    # test_tfmad_two_outputs()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/tests/travis_install.sh b/tests/travis_install.sh
index 5eaf44ffb12584b013fbfb0332e8e22d4fea2aaa..d37350e625281e087fce8aaa9654ad98d2a53b73 100644
--- a/tests/travis_install.sh
+++ b/tests/travis_install.sh
@@ -38,19 +38,25 @@ if [[ "$DISTRIB" == "conda" ]]; then
     # (prefer local venv, since the miniconda folder is cached)
     conda create -p ./.venv --yes python=${PYTHON_VERSION} pip virtualenv
     source activate ./.venv
+    alias pip3='python -m pip'
+    shopt -s expand_aliases
 fi
 
 # for all
-pip install -U pip setuptools
-pip install tox
-pip install codecov
-pip install sphinx
-# use newest pystencils
-pip install git+https://github.com/mabau/pystencils.git
+pip3 install -U pip wheel setuptools
+pip3 install tox
+pip3 install codecov
+pip3 install sphinx
+
+if [[ -z "$PYSTENCIL_FROM_PIP" ]]; then
+    pip install git+https://github.com/mabau/pystencils.git
+else
+    pip install pystencils
+fi
 pip install flake8
 
 if [[ "$COVERAGE" == "true" ]]; then
-    pip install -U pytest-cov pytest-virtualenv coverage coveralls flake8
+    pip3 install -U pytest-cov pytest-virtualenv coverage coveralls flake8
 fi