diff --git a/setup.cfg b/setup.cfg index 6932c1fdc8c13e9eed8a2a926187a19eb16b4d7b..5347592e9860fdd40b5ac840f9fbc5107104b451 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,7 +43,7 @@ test_requires = pytest-html ansi2html pytest-cov - tensorflow + tensorflow-gpu # Require a specific Python version, e.g. Python 2.7 or >= 3.4 # python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.* @@ -62,7 +62,7 @@ testing = pytest-html ansi2html pytest-cov - tensorflow + tensorflow-gpu pybind11 = cppimport diff --git a/src/pystencils_autodiff/_autodiff.py b/src/pystencils_autodiff/_autodiff.py index b8f16fe587c838533a5e09fbf11553b3078313b2..8c78c84a466894863d5fe814f9524d2290681e9e 100644 --- a/src/pystencils_autodiff/_autodiff.py +++ b/src/pystencils_autodiff/_autodiff.py @@ -35,7 +35,7 @@ Backward: def __init__(self, forward_assignments: List[ps.Assignment], - op_name: str = "AutoDiffOp", + op_name: str = "autodiffop", time_constant_fields: List[ps.Field] = [], constant_fields: List[ps.Field] = [], diff_fields_prefix='diff', # TODO: remove! @@ -469,16 +469,17 @@ Backward: return self.create_tensorflow_op(*args, backend='torch_native', **kwags) def create_tensorflow_op(self, - inputfield_tensor_dict, + inputfield_tensor_dict={}, forward_loop=None, backward_loop=None, + use_cuda=True, backend='tensorflow'): """ Creates custom differentiable Tensorflow Op from assignments. Will return either a single output tensor or a OrderedDict[field_name -> tf.Tensor] in case of multiple outputs """ backend = backend.lower() - assert backend in AVAILABLE_BACKENDS, "\"{}\" is not a. Available backends: {}".format( + assert backend in AVAILABLE_BACKENDS, "\"{}\" is not a valid backend. Available backends: {}".format( backend, AVAILABLE_BACKENDS) additional_fields = [f for f in inputfield_tensor_dict.keys( @@ -536,17 +537,20 @@ Backward: backward_loop = backward_function - if backend == 'tensorflow': + if backend == 'tensorflow_native': import pystencils_autodiff.backends._tensorflow - op = pystencils_autodiff.backends._tensorflow.tensorflowop_from_autodiffop( - self, inputfield_tensor_dict, forward_loop, backward_loop) + op = pystencils_autodiff.backends._tensorflow.native_tensorflowop_from_autodiffop(self, use_cuda) elif backend == 'torch': import pystencils_autodiff.backends._pytorch op = pystencils_autodiff.backends._pytorch.create_autograd_function( self, inputfield_tensor_dict, forward_loop, backward_loop) elif backend == 'torch_native': import pystencils_autodiff.backends._torch_native - op = pystencils_autodiff.backends._torch_native.create_autograd_function(self, inputfield_tensor_dict) + op = pystencils_autodiff.backends._torch_native.create_autograd_function(self, use_cuda) + elif backend == 'tensorflow': + import pystencils_autodiff.backends._tensorflow + op = pystencils_autodiff.backends._tensorflow.tensorflowop_from_autodiffop( + self, inputfield_tensor_dict, forward_loop, backward_loop) else: raise NotImplementedError() diff --git a/src/pystencils_autodiff/backends/__init__.py b/src/pystencils_autodiff/backends/__init__.py index d22da0fb454f507d5f00117e2f885dc48e2c4884..8744d1945f67f3651f259c66d69681a48941ddc5 100644 --- a/src/pystencils_autodiff/backends/__init__.py +++ b/src/pystencils_autodiff/backends/__init__.py @@ -6,4 +6,4 @@ a Torch or a Tensorflow operation or we can compile a static library to be directly loaded into Torch/Tensorflow. """ -AVAILABLE_BACKENDS = ['tensorflow', 'torch', 'tensorflow_cpp', 'torch_native'] +AVAILABLE_BACKENDS = ['tensorflow', 'torch', 'tensorflow_native', 'torch_native'] diff --git a/src/pystencils_autodiff/backends/_tensorflow.py b/src/pystencils_autodiff/backends/_tensorflow.py index b24343781d40d3845b3b1a99c5b68b46e8d06040..59a66f48c7f78f04ecccc4d3226a84d5b726fb9a 100644 --- a/src/pystencils_autodiff/backends/_tensorflow.py +++ b/src/pystencils_autodiff/backends/_tensorflow.py @@ -1,7 +1,12 @@ +from collections.abc import Iterable + +import stringcase import tensorflow as tf from tensorflow.compat.v1 import get_default_graph, py_func import pystencils_autodiff +from pystencils_autodiff.backends.astnodes import TensorflowModule +from pystencils_autodiff.tensorflow_jit import _hash _num_generated_ops = 0 @@ -36,6 +41,44 @@ def _py_func(func, inp, Tout, stateful=False, name=None, grad=None): return py_func(func, inp, Tout, stateful=stateful, name=name) +def native_tensorflowop_from_autodiffop(autodiff_obj: pystencils_autodiff.AutoDiffOp, + use_cuda): + + if use_cuda: + forward_ast = autodiff_obj.forward_ast_gpu + backward_ast = autodiff_obj.backward_ast_gpu + else: + forward_ast = autodiff_obj.forward_ast_cpu + backward_ast = autodiff_obj.backward_ast_cpu + + autodiff_obj.op_name = f'{autodiff_obj.op_name}_hash{_hash(str(autodiff_obj).encode()).hexdigest()}' + if use_cuda: + autodiff_obj.op_name += '_cuda' + forward_ast.function_name = autodiff_obj.op_name + "_forward" + backward_ast.function_name = autodiff_obj.op_name + "_backward" + module = TensorflowModule(autodiff_obj.op_name, [forward_ast, backward_ast]) + compiled_op = module.compile() + + backward_func = getattr(compiled_op, stringcase.snakecase( + stringcase.pascalcase("call_" + backward_ast.function_name))) + + def gradient_calculation(op, grad): + if isinstance(grad, Iterable): + grad = [grad] + return backward_func(**{autodiff_obj.backward_input_fields[i].name: g for i, g in enumerate(grad)}, + **{autodiff_obj.forward_input_fields[i].name: inp for i, inp in enumerate(op.inputs) + if autodiff_obj.forward_input_fields[i] in backward_ast.fields_accessed}) + + try: + tf.RegisterGradient(stringcase.pascalcase("call_" + forward_ast.function_name))( + gradient_calculation + ) + except Exception: + pass + + return getattr(compiled_op, stringcase.snakecase(stringcase.pascalcase("call_" + forward_ast.function_name))) + + def tensorflowop_from_autodiffop(autodiffop: pystencils_autodiff.AutoDiffOp, inputfield_tensor_dict, forward_function, diff --git a/src/pystencils_autodiff/backends/_tensorflow_cpp.py b/src/pystencils_autodiff/backends/_tensorflow_cpp.py deleted file mode 100644 index d64f1765bc4756e84ddaa5da0cf2eb5cac7ea7bf..0000000000000000000000000000000000000000 --- a/src/pystencils_autodiff/backends/_tensorflow_cpp.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Implementing a custom Tensorflow Op in C++ has some advantages and disadvantages - -Advantages: -- GPU support without any hacks -- Access to raw tensors without conversion to numpy -- Custom Ops will be serializable - -Disadavantages: -- C++ Code has to be build with correct parameters and ABI -for present Tensorflow version (best integrated into Tensorflow build) - -""" - - -# raise NotImplementedError() diff --git a/src/pystencils_autodiff/backends/_torch_native.py b/src/pystencils_autodiff/backends/_torch_native.py index 61022bdc49717b86fd024de299212b9c3da246c4..b8b2ff559d5344804ddb69f1a868eac6a74e948a 100644 --- a/src/pystencils_autodiff/backends/_torch_native.py +++ b/src/pystencils_autodiff/backends/_torch_native.py @@ -1,8 +1,8 @@ -import uuid from collections import OrderedDict from pystencils_autodiff.backends._pytorch import numpy_dtype_to_torch from pystencils_autodiff.backends.astnodes import TorchModule +from pystencils_autodiff.tensorflow_jit import _hash try: import torch @@ -10,48 +10,83 @@ except ImportError: pass -def create_autograd_function(autodiff_obj, inputfield_to_tensor_dict): - field_to_tensor_dict = inputfield_to_tensor_dict - +def create_autograd_function(autodiff_obj, use_cuda): + field_to_tensor_dict = dict() # Allocate output tensor for forward and backward pass - for field in autodiff_obj.forward_output_fields + autodiff_obj.backward_output_fields: - field_to_tensor_dict[field] = torch.zeros( - *field.shape, - dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype), - device=list(inputfield_to_tensor_dict.values())[0].device) - - all_tensors = field_to_tensor_dict.values() - is_cuda = all(a.is_cuda for a in all_tensors) + # for field in autodiff_obj.forward_output_fields + autodiff_obj.backward_output_fields: + # field_to_tensor_dict[field] = torch.zeros( + # *field.shape, + # dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype), + # device=list(inputfield_to_tensor_dict.values())[0].device) - if is_cuda: + if use_cuda: forward_ast = autodiff_obj.forward_ast_gpu backward_ast = autodiff_obj.backward_ast_gpu else: forward_ast = autodiff_obj.forward_ast_cpu backward_ast = autodiff_obj.backward_ast_cpu - op_name = autodiff_obj.op_name + str(uuid.uuid4()) - compiled_op = TorchModule(op_name, [forward_ast, backward_ast]) - - output_tensors = OrderedDict({f.name: field_to_tensor_dict[f] for f in autodiff_obj.forward_output_fields}) - backward_output_tensors = OrderedDict( - {f.name: field_to_tensor_dict[f] for f in autodiff_obj.backward_output_fields}) - - def forward(self, **input_tensors): - - self.save_for_backward(**input_tensors) + op_name = f'{autodiff_obj.op_name}_{_hash(str(autodiff_obj).encode()).hexdigest()}' + module = TorchModule(op_name, [forward_ast, backward_ast]) + compiled_op = module.compile() + + # print(TorchModule(op_name, [forward_ast, backward_ast])) + # wrapper = module.atoms(WrapperFunction) + # forward_wrapper_ast = [w for w in wrapper if w.function_name == "call_" + forward_ast.function_name][0] + # backward_wrapper_ast = [w for w in wrapper if w.function_name == "call_" + backward_ast.function_name][0] + # forward_parameters = [str(p.symbol) for p in forward_wrapper_ast.get_parameters()] + # backward_parameters = [str(p.symbol) for p in backward_wrapper_ast.get_parameters()] + + def forward(self, *args): + + if use_cuda: + args = [a.contiguous().cuda() for a in args] + else: + args = [a.contiguous().cpu() for a in args] + + input_tensors = dict() + input_tensors.update({f.name: args[i] for i, f in enumerate( + autodiff_obj.forward_input_fields) if f in forward_ast.fields_accessed}) + assert all(f.shape == args[i].shape for i, f in enumerate(autodiff_obj.forward_input_fields)) + assert all(f.strides == tuple(args[i].stride(j) for j in range(args[i].ndim)) + for i, f in enumerate(autodiff_obj.forward_input_fields)) + for field in autodiff_obj.forward_output_fields: + field_to_tensor_dict[field] = torch.zeros( + field.shape, + dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype), + device=args[0].device) + output_tensors = OrderedDict({f.name: field_to_tensor_dict[f] for f in autodiff_obj.forward_output_fields}) + + self.save_for_backward(*args) getattr(compiled_op, "call_" + forward_ast.function_name)(**input_tensors, **output_tensors) - return output_tensors.values() + return tuple(output_tensors.values()) def backward(self, *grad_outputs): + if use_cuda: + grad_outputs = [a.contiguous().cuda() for a in grad_outputs] + else: + grad_outputs = [a.contiguous().cpu() for a in grad_outputs] gradients = {f.name: grad_outputs[i] for i, f in enumerate(autodiff_obj.backward_input_fields)} - saved = self.saved_tensors - + assert all(f.shape == grad_outputs[i].shape for i, f in enumerate(autodiff_obj.backward_input_fields)) + assert all(f.strides == tuple(grad_outputs[i].stride(j) for j in range(grad_outputs[i].ndim)) + for i, f in enumerate(autodiff_obj.backward_input_fields)) + assert all(a.is_cuda == use_cuda for a in grad_outputs), f"Some of the tensors where on the wrong device. " \ + f"Op was compiled for CUDA: {str(use_cuda)}" + saved = {f.name: self.saved_tensors[i] for i, f in enumerate( + autodiff_obj.forward_input_fields) if f in backward_ast.fields_accessed} + for field in autodiff_obj.backward_output_fields: + field_to_tensor_dict[field] = torch.zeros( + field.shape, + dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype), + device=grad_outputs[0].device) + + backward_output_tensors = OrderedDict( + {f.name: field_to_tensor_dict[f] for f in autodiff_obj.backward_output_fields}) getattr(compiled_op, "call_" + backward_ast.function_name)(**gradients, **saved, **backward_output_tensors) - return backward_output_tensors.values() + return tuple(backward_output_tensors.values()) cls = type(op_name, (torch.autograd.Function,), {}) cls.forward = forward diff --git a/src/pystencils_autodiff/tensorflow_jit.py b/src/pystencils_autodiff/tensorflow_jit.py index bd7b2f67915be2f480f668dbd3df62681ff630f3..909b980f43821ed70ccc21193a9210d70b5c4f4d 100644 --- a/src/pystencils_autodiff/tensorflow_jit.py +++ b/src/pystencils_autodiff/tensorflow_jit.py @@ -35,7 +35,7 @@ else: _do_not_link_flag = '-c' _output_flag = '-o' _shared_object_flag = '/DLL' - _include_flags = ['/I' + sysconfig.get_paths()['include'], '/I' + get_pystencils_include_path()] + _include_flags = ['-I' + sysconfig.get_paths()['include'], '-I' + get_pystencils_include_path()] _position_independent_flag = "/DTHIS_FLAG_DOES_NOTHING" get_compiler_config()['command'] = 'cl.exe' config_env = get_compiler_config()['env'] if 'env' in get_compiler_config() else {} diff --git a/tests/backends/test_torch_native_compilation.py b/tests/backends/test_torch_native_compilation.py index 2d01db730754f57f15013431a342232ae87478d0..5814ca50eb93dd1af056909636360f73fcbfcb02 100644 --- a/tests/backends/test_torch_native_compilation.py +++ b/tests/backends/test_torch_native_compilation.py @@ -13,7 +13,7 @@ import sympy import pystencils from pystencils_autodiff import create_backward_assignments -from pystencils_autodiff._file_io import write_cached_content, write_file +from pystencils_autodiff._file_io import write_cached_content from pystencils_autodiff.backends.astnodes import PybindModule, TorchModule torch = pytest.importorskip('torch') diff --git a/tests/test_tfmad.py b/tests/test_tfmad.py index 7dd10e1d6a642b127744f07593fa02332e3cccb4..8d14fa08229b85f22ea7bdca21b60a8b3e2d2541 100644 --- a/tests/test_tfmad.py +++ b/tests/test_tfmad.py @@ -1,4 +1,5 @@ import argparse +import itertools import os import numpy as np @@ -165,10 +166,10 @@ def test_tfmad_gradient_check_torch(): a, b, out = ps.fields("a, b, out: float[21,13]") - cont = ps.fd.Diff(a, 0) - ps.fd.Diff(a, 1) - \ - ps.fd.Diff(b, 0) + ps.fd.Diff(b, 1) + cont = 2 * ps.fd.Diff(a, 0) - 1.5 * ps.fd.Diff(a, 1) \ + - ps.fd.Diff(b, 0) + 3 * ps.fd.Diff(b, 1) discretize = ps.fd.Discretization2ndOrder(dx=1) - discretization = discretize(cont) + discretization = discretize(cont) + 1.2*a.center assignment = ps.Assignment(out.center(), discretization) assignment_collection = ps.AssignmentCollection([assignment], []) @@ -189,12 +190,109 @@ def test_tfmad_gradient_check_torch(): function = auto_diff.create_tensorflow_op({ a: a_tensor, b: b_tensor - }, - backend='torch') + }, backend='torch') torch.autograd.gradcheck(function.apply, [a_tensor, b_tensor]) +@pytest.mark.parametrize('with_offsets, with_cuda', itertools.product((False, True), repeat=2)) +def test_tfmad_gradient_check_torch_native(with_offsets, with_cuda): + torch = pytest.importorskip('torch') + import torch + + a, b, out = ps.fields("a, b, out: float64[21,13]") + + if with_offsets: + cont = 2*ps.fd.Diff(a, 0) - 1.5*ps.fd.Diff(a, 1) - ps.fd.Diff(b, 0) + 3 * ps.fd.Diff(b, 1) + discretize = ps.fd.Discretization2ndOrder(dx=1) + discretization = discretize(cont) + + assignment = ps.Assignment(out.center(), discretization + 1.2*a.center()) + else: + assignment = ps.Assignment(out.center(), 1.2*a.center + 0.1*b.center) + assignment_collection = ps.AssignmentCollection([assignment], []) + print('Forward') + print(assignment_collection) + + print('Backward') + auto_diff = pystencils_autodiff.AutoDiffOp(assignment_collection, + diff_mode='transposed-forward') + backward = auto_diff.backward_assignments + print(backward) + print('Forward output fields (to check order)') + print(auto_diff.forward_input_fields) + + a_tensor = torch.zeros(*a.shape, dtype=torch.float64, requires_grad=True).contiguous() + b_tensor = torch.zeros(*b.shape, dtype=torch.float64, requires_grad=True).contiguous() + + if with_cuda: + a_tensor = a_tensor.cuda() + b_tensor = b_tensor.cuda() + + function = auto_diff.create_tensorflow_op(use_cuda=with_cuda, backend='torch_native') + + dict = { + a: a_tensor, + b: b_tensor + } + torch.autograd.gradcheck(function.apply, tuple( + [dict[f] for f in auto_diff.forward_input_fields]), atol=1e-4, raise_exception=True) + + +@pytest.mark.parametrize('with_offsets, with_cuda, gradient_check', itertools.product((False, True), repeat=3)) +def test_tfmad_gradient_check_tensorflow_native(with_offsets, with_cuda, gradient_check): + pytest.importorskip('tensorflow') + import tensorflow as tf + + a, b, out = ps.fields("a, b, out: double[21,13]") + print(a.shape) + + if with_offsets: + cont = 2*ps.fd.Diff(a, 0) - 1.5*ps.fd.Diff(a, 1) - ps.fd.Diff(b, 0) + 3 * ps.fd.Diff(b, 1) + discretize = ps.fd.Discretization2ndOrder(dx=1) + discretization = discretize(cont) + + assignment = ps.Assignment(out.center(), 1.2*a.center + 0.1*b[1, 0]) + else: + assignment = ps.Assignment(out.center(), 1.2*a.center + 0.1*b.center) + + assignment_collection = ps.AssignmentCollection([assignment], []) + print('Forward') + print(assignment_collection) + + print('Backward') + auto_diff = pystencils_autodiff.AutoDiffOp(assignment_collection, + diff_mode='transposed') + backward = auto_diff.backward_assignments + print(backward) + print('Forward output fields (to check order)') + print(auto_diff.forward_input_fields) + + a_tensor = tf.Variable(np.zeros(a.shape, a.dtype.numpy_dtype)) + b_tensor = tf.Variable(np.zeros(a.shape, a.dtype.numpy_dtype)) + # out_tensor = auto_diff.create_tensorflow_op(use_cuda=with_cuda, backend='tensorflow_native') + # print(out_tensor) + + out_tensor = auto_diff.create_tensorflow_op(use_cuda=with_cuda, backend='tensorflow_native')(a=a_tensor, b=b_tensor) + + with tf.compat.v1.Session() as sess: + sess.run(tf.global_variables_initializer()) + sess.run(out_tensor) + + if gradient_check: + gradient_error = compute_gradient_error_without_border( + [a_tensor, b_tensor], [a.shape, b.shape], + out_tensor, + out.shape, + num_border_pixels=2, + ndim=2, + debug=False) + print('error: %s' % gradient_error.max_error) + print('avg error: %s' % gradient_error.avg_error) + + assert any(e < 1e-4 for e in gradient_error.values()) + + def get_curl(input_field: ps.Field, curl_field: ps.Field): """Return a ps.AssignmentCollection describing the calculation of the curl given a 2d or 3d vector field [z,y,x](f) or [y,x](f)