Skip to content
Snippets Groups Projects
Commit 16bfa1e4 authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Delete duplicate files in wrong place

parent 72efef52
No related branches found
No related tags found
No related merge requests found
from typing import List
import pystencils as ps
def shift_assignments(assignments: List[ps.Assignment], shift_vector, shift_write_assignments=True, shift_read_assignments=True):
if hasattr(assignments, "main_assignments"):
assignments = assignments.main_assignments
shifted_assignments = assignments
for i, a in enumerate(shifted_assignments):
for symbol in [*a.free_symbols, a.rhs]:
if isinstance(symbol, ps.Field.Access):
shifted_access = symbol.get_shifted(
*shift_vector)
shifted_assignments[i] = shifted_assignments[i].subs(
symbol, shifted_access)
return shifted_assignments
def transform_assignments_rhs(assignments, rhs_transform_function):
new_assignments = []
for _, a in enumerate(assignments):
new_rhs = rhs_transform_function(a)
new_assignments.append(
ps.Assignment(a.lhs, new_rhs))
return new_assignments
def transform_assignments(assignments, transform_function):
new_assignments = []
for _, a in enumerate(assignments):
new_assignment = transform_function(a)
if isinstance(new_assignment, tuple):
lhs, rhs = new_assignment
new_assignment = ps.Assignment(lhs, rhs)
if new_assignment is not None:
new_assignments.append(new_assignment)
return new_assignments
def replace_symbols_in_assignments(assignments: List[ps.Assignment], symbol, replace_symbol):
if hasattr(assignments, "main_assignments"):
assignments = assignments.main_assignments
shifted_assignments = assignments
for i, a in enumerate(shifted_assignments):
shifted_assignments[i] = a.subs(symbol, replace_symbol)
return shifted_assignments
import numpy as np
import pystencils as ps
from pystencils_autodiff.backends import AVAILABLE_BACKENDS
def fix_layout(array, target_field, backend):
assert array.shape == target_field.shape, "Array %s's shape should be %s but is %s" % (
target_field.name, target_field.shape, array.shape)
assert backend.lower() in AVAILABLE_BACKENDS
# Just index coordinate wrong?
swapped_array = np.swapaxes(array, 0, -1)
if swapped_array.strides == target_field.strides and swapped_array.shape == target_field.shade and target_field.index_dimensions == 1:
array = swapped_array
# Everything ok
everything_ok = (array.strides == target_field.strides
and array.shape == target_field.shape)
if everything_ok:
rtn = array
else: # no, fix it!
f = target_field
rtn = np.lib.stride_tricks.as_strided(np.zeros(f.shape, dtype=f.dtype.numpy_dtype),
f.shape,
[f.dtype.numpy_dtype.itemsize * a for a in f.strides])
rtn[...] = array
return rtn
import numpy as np
try:
import tensorflow as tf
except ImportError:
pass
try:
import torch
except ImportError:
pass
def tf_constant_from_field(field, init_val=0):
return tf.constant(init_val, dtype=field.dtype.numpy_dtype, shape=field.shape, name=field.name + '_constant')
def tf_scalar_variable_from_field(field, init_val, constraint=None):
var = tf.Variable(init_val, dtype=field.dtype.numpy_dtype, name=field.name + '_variable', constraint=constraint)
return var * tf_constant_from_field(field, 1)
def tf_variable_from_field(field, init_val=0, constraint=None):
if isinstance(init_val, (int, float)):
init_val *= np.ones(field.shape, field.dtype.numpy_dtype)
return tf.Variable(init_val, dtype=field.dtype.numpy_dtype, name=field.name + '_variable', constraint=constraint)
def tf_placeholder_from_field(field):
return tf.placeholder(dtype=field.dtype.numpy_dtype, name=field.name + '_placeholder', shape=field.shape)
def torch_tensor_from_field(field, init_val=0, cuda=True, requires_grad=False):
if isinstance(init_val, (int, float)):
init_val *= np.ones(field.shape, field.dtype.numpy_dtype)
device = torch.device('cuda' if cuda else 'cpu')
return torch.tensor(init_val, requires_grad=requires_grad, device=device)
import pystencils
from pystencils.astnodes import FieldShapeSymbol, FieldStrideSymbol
"""
Determines how adjoint fields will be denoted in LaTeX output in terms of the forward field representation %s
Default: "\\hat{%s}"
"""
ADJOINT_FIELD_LATEX_HIGHLIGHT = r"\hat{%s}"
class AdjointField(pystencils.Field):
"""Field representing adjoint variables to a Field representing the forward variables"""
def __init__(self, forward_field, name_prefix='diff'):
new_name = name_prefix + forward_field.name
super(AdjointField, self).__init__(new_name, forward_field.field_type, forward_field._dtype,
forward_field._layout, forward_field.shape, forward_field.strides)
self.corresponding_forward_field = forward_field
self.name_prefix = name_prefix
# Eliminate references to forward fields that might not be present in backward kernels
self.shape = tuple(FieldShapeSymbol([self.name], s.coordinate) if isinstance(
s, FieldShapeSymbol) else s for s in self.shape)
self.strides = tuple(FieldStrideSymbol(self.name, s.coordinate) if isinstance(
s, FieldStrideSymbol) else s for s in self.strides)
if forward_field.latex_name:
self.latex_name = ADJOINT_FIELD_LATEX_HIGHLIGHT % forward_field.latex_name
else:
self.latex_name = ADJOINT_FIELD_LATEX_HIGHLIGHT % forward_field.name
import uuid
import numpy as np
import torch
try:
import pycuda.autoinit
import pycuda.gpuarray
import pycuda.driver
HAS_PYCUDA = True
except Exception:
HAS_PYCUDA = False
# Fails if different context/thread
def tensor_to_gpuarray(tensor):
if not tensor.is_cuda:
raise ValueError(
'Cannot convert CPU tensor to GPUArray (call `cuda()` on it)')
else:
return pycuda.gpuarray.GPUArray(tensor.shape,
dtype=torch_dtype_to_numpy(tensor.dtype),
gpudata=tensor.data_ptr())
def create_autograd_function(autodiff_obj, inputfield_to_tensor_dict, forward_loop, backward_loop,
convert_tensors_to_arrays=True):
field_to_tensor_dict = inputfield_to_tensor_dict
backward_input_fields = autodiff_obj.backward_input_fields
# Allocate output tensor for forward and backward pass
for field in autodiff_obj.forward_output_fields + autodiff_obj.backward_output_fields:
field_to_tensor_dict[field] = torch.zeros(
*field.shape,
dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype),
device=list(inputfield_to_tensor_dict.values())[0].device)
tensor_to_field_dict = {
v: k for k, v in field_to_tensor_dict.items()}
def _tensors_to_dict(is_cuda, args, additional_dict={}):
arrays = dict()
lookup_dict = {**tensor_to_field_dict, **additional_dict}
for a in args:
if convert_tensors_to_arrays:
if is_cuda:
a.cuda()
array = tensor_to_gpuarray(a)
else:
a.cpu()
array = a.data.numpy()
try:
arrays[lookup_dict[a].name] = array
except:
pass
else:
array = a
try:
arrays[lookup_dict[a].name] = array
except:
pass
return arrays
def forward(self, *input_tensors):
self.save_for_backward(*input_tensors)
all_tensors = field_to_tensor_dict.values()
is_cuda = all(a.is_cuda for a in all_tensors)
arrays = _tensors_to_dict(is_cuda, all_tensors)
forward_loop(**arrays, is_cuda=is_cuda)
return tuple(field_to_tensor_dict[f] for f in autodiff_obj.forward_output_fields)
def backward(self, *grad_outputs):
all_tensors = grad_outputs + tuple(field_to_tensor_dict.values())
is_cuda = all(a.is_cuda for a in all_tensors)
arrays = _tensors_to_dict(is_cuda, all_tensors, additional_dict={
f.name: grad_outputs[i] for i, f in enumerate(backward_input_fields)})
backward_loop(**arrays, is_cuda=is_cuda)
return tuple(field_to_tensor_dict[f] for f in autodiff_obj.backward_output_fields)
cls = type(str(uuid.uuid4()), (torch.autograd.Function,), {})
cls.forward = forward
cls.backward = backward
return cls
# from: https://stackoverflow.com/questions/51438232/how-can-i-create-a-pycuda-gpuarray-from-a-gpu-memory-address
def torch_dtype_to_numpy(dtype):
dtype_name = str(dtype).replace('torch.', '') # remove 'torch.'
return getattr(np, dtype_name)
def numpy_dtype_to_torch(dtype):
dtype_name = str(dtype)
return getattr(torch, dtype_name)
def gpuarray_to_tensor(gpuarray, context=None):
"""
Convert a :class:`pycuda.gpuarray.GPUArray` to a :class:`torch.Tensor`. The underlying
storage will NOT be shared, since a new copy must be allocated.
Parameters
----------
gpuarray : pycuda.gpuarray.GPUArray
Returns
-------
torch.Tensor
"""
if not context:
context = pycuda.autoinit.context
shape = gpuarray.shape
dtype = gpuarray.dtype
out_dtype = dtype
out = torch.zeros(shape, dtype=out_dtype).cuda()
gpuarray_copy = tensor_to_gpuarray(out)
byte_size = gpuarray.itemsize * gpuarray.size
pycuda.driver.memcpy_dtod(gpuarray_copy.gpudata,
gpuarray.gpudata, byte_size)
return out
if HAS_PYCUDA:
class GpuPointerHolder(pycuda.driver.PointerHolderBase):
def __init__(self, tensor):
super().__init__()
self.tensor = tensor
self.gpudata = tensor.data_ptr()
def get_pointer(self):
return self.tensor.data_ptr()
def __int__(self):
return self.__index__()
# without an __index__ method, arithmetic calls to the GPUArray backed by this pointer fail
# not sure why, this needs to return some integer, apparently
def __index__(self):
return self.gpudata
else:
GpuPointerHolder = None
import tensorflow as tf
import pystencils_autodiff
import numpy as np
from pystencils.utils import DotDict
from tf.compat.v1 import get_default_graph
_num_generated_ops = 0
def _py_func(func, inp, Tout, stateful=False, name=None, grad=None):
"""
Copied from random internet forum. It seems to be important to give
PyFunc to give an random name in override map to properly register gradients
PyFunc defined as given by Tensorflow
:param func: Custom Function
:param inp: Function Inputs
:param Tout: Output Type of out Custom Function
:param stateful: Calculate Gradients when stateful is True
:param name: Name of the PyFunction
:param grad: Custom Gradient Function
:return:
"""
# Generate Random Gradient name in order to avoid conflicts with inbuilt names
global _num_generated_ops
rnd_name = 'PyFuncGrad' + str(_num_generated_ops) + 'ABC@a1b2c3'
_num_generated_ops += 1
# Register Tensorflow Gradient
tf.RegisterGradient(rnd_name)(grad)
# Get current graph
g = get_default_graph()
# Add gradient override map
with g.gradient_override_map({"PyFunc": rnd_name, "PyFuncStateless": rnd_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
def tensorflowop_from_autodiffop(autodiffop: pystencils_autodiff.AutoDiffOp, inputfield_tensor_dict, forward_function, backward_function):
def helper_forward(*args):
kwargs = dict()
for i in range(len(args)):
if args[i] is not None:
kwargs[autodiffop.forward_input_fields[i].name] = args[i]
rtn_dict = forward_function(**kwargs)
return [rtn_dict[o.name] for o in autodiffop._forward_output_fields]
def helper_backward(*args):
kwargs = dict()
for i in range(len(args)):
if i < len(autodiffop.forward_input_fields):
kwargs[autodiffop.forward_input_fields[i].name] = args[i]
else:
kwargs[autodiffop._backward_input_fields[i -
len(autodiffop.forward_input_fields)].name] = args[i]
rtn_dict = backward_function(**kwargs)
return [rtn_dict[o.name] for o in autodiffop._backward_output_fields]
def backward(op, *grad):
return tf.py_func(helper_backward, [*op.inputs, *grad], [f.dtype.numpy_dtype for f in autodiffop._backward_output_fields], name=autodiffop.op_name + '_backward', stateful=False)
output_tensors = _py_func(helper_forward,
[inputfield_tensor_dict[f]
for f in autodiffop.forward_input_fields],
[f.dtype.numpy_dtype for f in autodiffop._forward_output_fields],
name=autodiffop.op_name, stateful=False, grad=backward)
return output_tensors
"""Implementing a custom Tensorflow Op in C++ has some advantages and disadvantages
Advantages:
- GPU support without any hacks
- Access to raw tensors without conversion to numpy
- Custom Ops will be serializable
Disadavantages:
- C++ Code has to be build with correct parameters and ABI for present Tensorflow version (best integrated into Tensorflow build)
"""
# raise NotImplementedError()
import os
import types
import uuid
from itertools import chain
from os.path import dirname, isdir, isfile, join
import jinja2
import torch
from appdirs import user_cache_dir
import pystencils
import pystencils_autodiff
import pystencils_autodiff.backends._pytorch
from pystencils.astnodes import FieldShapeSymbol
from pystencils.backends.cbackend import generate_c
from pystencils.backends.cuda_backend import CudaSympyPrinter, generate_cuda
from pystencils.cpu.kernelcreation import create_kernel
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
from pystencils_autodiff.backends._pytorch import numpy_dtype_to_torch
def _read_file(file):
with open(file, 'r') as f:
return f.read()
def _write_file(filename, content):
with open(filename, 'w') as f:
return f.write(content)
def generate_torch(destination_folder,
autodiff: pystencils_autodiff.AutoDiffOp,
is_cuda,
dtype,
forward_ast=None,
backward_ast=None):
shape = autodiff.forward_output_fields[0].spatial_shape
operation_hash = abs(hash(autodiff) + hash(shape) + hash(str(dtype)))
operation_string = "%s_native_%s_%s_%x" % (
autodiff.op_name, 'cuda' if is_cuda else 'cpu', 'x'.join(str(s) for s in shape), operation_hash)
cpp_file = join(destination_folder, operation_string + '.cpp')
cuda_kernel_file = join(destination_folder, operation_string + '.cu')
required_files = [cpp_file, cuda_kernel_file] if is_cuda else [cpp_file]
if not all(isfile(x) for x in required_files):
generate_ast = create_cuda_kernel if is_cuda else create_kernel
generate_code = generate_cuda if is_cuda else generate_c
if not forward_ast:
forward_ast = generate_ast(autodiff.forward_assignments.all_assignments)
if not backward_ast:
backward_ast = generate_ast(autodiff.backward_assignments.all_assignments)
forward_ast.subs({s: FieldShapeSymbol(
[autodiff.forward_output_fields[0].name], s.coordinate) for s in forward_ast.atoms(FieldShapeSymbol)})
backward_ast.subs({s: FieldShapeSymbol(
[autodiff.backward_output_fields[0].name], s.coordinate) for s in backward_ast.atoms(FieldShapeSymbol)})
# backward_ast.subs({s: FieldStrideSymbol(
# autodiff.forward_input_fields[0].name, s.coordinate) for s in forward_ast.atoms(FieldStrideSymbol)})
forward_code = generate_code(forward_ast.body).replace(
'float *', 'scalar_t *').replace('double *', 'scalar_t *')
backward_code = generate_code(backward_ast.body).replace(
'float *', 'scalar_t *').replace('double *', 'scalar_t *')
if is_cuda:
printer = CudaSympyPrinter()
block_and_thread_numbers = forward_ast.indexing.call_parameters(shape)
forward_block = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['block'])
forward_grid = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['grid'])
backward_shape = autodiff.backward_output_fields[0].spatial_shape
block_and_thread_numbers = backward_ast.indexing.call_parameters(backward_shape)
backward_block = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['block'])
backward_grid = ', '.join(printer.doprint(i) for i in block_and_thread_numbers['grid'])
cuda_globals = pystencils.backends.cbackend.get_global_declarations(forward_ast) | \
pystencils.backends.cbackend.get_global_declarations(backward_ast)
cuda_globals = [generate_cuda(g) for g in cuda_globals]
else:
backward_block = forward_block = "INVALID"
backward_grid = forward_grid = "INVALID"
cuda_globals = ""
render_dict = {
"forward_tensors": [f for f in autodiff.forward_fields],
"forward_input_tensors": [f for f in autodiff.forward_input_fields],
"forward_output_tensors": [f for f in autodiff.forward_output_fields],
"backward_tensors": [f for f in autodiff.backward_fields + autodiff.forward_input_fields],
"backward_input_tensors": [f for f in autodiff.backward_input_fields],
"backward_output_tensors": [f for f in autodiff.backward_output_fields],
"forward_kernel": forward_code,
"backward_kernel": backward_code,
"dimensions": range(autodiff.forward_fields[0].spatial_dimensions),
"kernel_name": operation_string,
"forward_threads": "{" + forward_block + "}",
"forward_blocks": "{" + forward_grid + "}",
"backward_threads": "{" + backward_block + "}",
"backward_blocks": "{" + backward_grid + "}",
"cuda_globals": cuda_globals,
"dtype": pystencils.data_types.BasicType(dtype)
}
if is_cuda:
template_string_cpp = _read_file(join(dirname(__file__),
'torch_native_cuda.tmpl.cpp'))
template = jinja2.Template(template_string_cpp)
output = template.render(render_dict)
_write_file(join(destination_folder, operation_string + '.cpp'), output)
template_string = _read_file(join(dirname(__file__), 'torch_native_cuda.tmpl.cu'))
template = jinja2.Template(template_string)
output = template.render(render_dict)
_write_file(join(destination_folder, operation_string + '.cu'), output)
else:
template_string_cpp = _read_file(join(dirname(__file__),
'torch_native_cpu.tmpl.cpp'))
template = jinja2.Template(template_string_cpp)
output = template.render(render_dict)
_write_file(join(destination_folder, operation_string + '.cpp'), output)
from torch.utils.cpp_extension import load
compiled_operation = load(operation_string, required_files, verbose=True,
extra_cuda_cflags=[] if is_cuda else [])
return compiled_operation
def create_autograd_function(autodiff_obj, inputfield_to_tensor_dict, forward_loop=None, backward_loop=None):
if forward_loop is None:
assert backward_loop is None
is_cuda = all(t.is_cuda for t in inputfield_to_tensor_dict.values())
assert all(t.is_cuda for t in inputfield_to_tensor_dict.values()) or \
all(not t.is_cuda for t in inputfield_to_tensor_dict.values()), "All tensor should be on GPU or all on CPU"
dtype = pystencils_autodiff.backends._pytorch.torch_dtype_to_numpy(
list(inputfield_to_tensor_dict.values())[0].dtype)
cache_dir = user_cache_dir('pystencils')
if not isdir(cache_dir):
os.mkdir(cache_dir)
# TODO: create function and stuff
compiled_operation = generate_torch(cache_dir, autodiff_obj, is_cuda,
dtype)
field_to_tensor_dict = inputfield_to_tensor_dict
# Allocate output tensor for forward and backward pass
for field in chain(autodiff_obj.forward_output_fields, autodiff_obj.backward_output_fields):
field_to_tensor_dict[field] = torch.zeros(
*field.shape,
dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype),
device=list(inputfield_to_tensor_dict.values())[0].device)
def forward(self):
self.saved = {f: field_to_tensor_dict[f] for f in chain(
autodiff_obj.forward_input_fields, autodiff_obj.backward_output_fields)}
compiled_operation.forward(**{f.name: field_to_tensor_dict[f] for f in autodiff_obj.forward_fields})
return tuple(field_to_tensor_dict[f] for f in autodiff_obj.forward_output_fields)
def backward(self, *grad_outputs):
self.saved.update({f.name: grad_outputs[i] for i, f in enumerate(autodiff_obj.backward_input_fields)})
compiled_operation.backward(**{f.name: t for f, t in self.saved.items()})
return tuple(self.saved[f] for f in autodiff_obj.backward_output_fields)
cls = type(str(uuid.uuid4()), (torch.autograd.Function,), {})
cls.saved = None
cls.forward = forward
cls.backward = backward
return cls
else:
op = pystencils_autodiff.backends._pytorch.create_autograd_function(autodiff_obj,
inputfield_to_tensor_dict,
forward_loop,
backward_loop,
convert_tensors_to_arrays=False)
return op
#include <torch/extension.h>
#include <vector>
using namespace pybind11::literals;
using scalar_t = {{ dtype }};
std::vector<at::Tensor> {{ kernel_name }}_forward(
{%- for tensor in forward_tensors -%}
at::Tensor {{ tensor }} {{- ", " if not loop.last -}}
{%- endfor %})
{
//{% for tensor in forward_output_tensors -%}
//auto {{tensor}} = at::zeros_like({{ forward_input_tensors[0] }});
//{% endfor %}
{% for i in dimensions -%}
int _size_{{ forward_tensors[0] }}_{{ i }} = {{ forward_tensors[0] }}.size({{ i }});
{% endfor %}
{% for tensor in forward_tensors -%}
{%- set last = loop.last -%}
scalar_t* _data_{{ tensor }} = {{ tensor }}.data<scalar_t>();
{% for i in dimensions -%}
int _stride_{{tensor}}_{{i}} = {{tensor}}.strides()[{{ i }}];
{% endfor -%}
{% endfor -%}
{{forward_kernel}}
return {
{%- for tensor in forward_output_tensors -%}
{{ tensor }} {{- "," if not loop.last -}}
{% endfor -%}
};
}
std::vector<at::Tensor> {{ kernel_name }}_backward(
{%- for tensor in backward_tensors -%}
at::Tensor {{ tensor }} {{- ", " if not loop.last -}}
{% endfor %})
{
//{% for tensor in backward_output_tensors -%}
//auto {{tensor}} = at::zeros_like({{ backward_input_tensors[0] }});
//{% endfor %}
{% for tensor in backward_tensors -%}
{%- set last = loop.last -%}
scalar_t* _data_{{ tensor }} = {{ tensor }}.data<scalar_t>();
{% for i in dimensions -%}
int _stride_{{ tensor }}_{{i}} = {{ tensor }}.strides()[{{ i }}];
{% endfor -%}
{% endfor -%}
{{backward_kernel}}
return {
{%- for tensor in backward_output_tensors -%}
{{ tensor }} {{- "," if not loop.last -}}
{% endfor -%}
};
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &{{ kernel_name }}_forward, "{{ kernel_name }} forward (CPU)",
{%- for tensor in forward_tensors -%}
"{{ tensor }}"_a {{ ", " if not loop.last }}
{%- endfor -%} );
m.def("backward", &{{ kernel_name }}_backward, "{{ kernel_name }} backward (CPU)",
{%- for tensor in backward_tensors -%}
"{{ tensor }}"_a {{ ", " if not loop.last }}
{%- endfor -%} );
}
#include <torch/extension.h>
#include <vector>
// CUDA forward declarations
using namespace pybind11::literals;
void {{ kernel_name }}_cuda_forward(
{%- for tensor in forward_tensors %}
at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
{% endfor %});
std::vector<at::Tensor> {{ kernel_name }}_cuda_backward(
{%- for tensor in backward_tensors -%}
at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
{% endfor %});
// C++ interface
// NOTE: AT_ASSERT has become AT_CHECK on master after 0.4.
#define CHECK_CUDA(x) \
AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
//AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x);
//CHECK_CONTIGUOUS(x)
std::vector<at::Tensor> {{ kernel_name }}_forward(
{%- for tensor in forward_tensors -%}
at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
{%- endfor %})
{
{% for tensor in forward_tensors -%}
CHECK_INPUT({{ tensor.name }});
{% endfor %}
{{ kernel_name }}_cuda_forward(
{%- for tensor in forward_tensors %}
{{ tensor.name }} {{- ", " if not loop.last }}
{%- endfor %});
return std::vector<at::Tensor>{
{%- for tensor in forward_output_tensors %}
{{ tensor.name }} {{- ", " if not loop.last }}
{%- endfor %}
}
;
}
std::vector<at::Tensor> {{ kernel_name }}_backward(
{%- for tensor in backward_tensors -%}
at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
{% endfor %})
{
{%- for tensor in forward_input_tensors + backward_input_tensors -%}
CHECK_INPUT({{ tensor }});
{% endfor %}
{{ kernel_name }}_cuda_backward(
{%- for tensor in backward_tensors -%}
{{ tensor.name }} {{- ", " if not loop.last }}
{%- endfor %});
return std::vector<at::Tensor>{
{%- for tensor in backward_output_tensors %}
{{ tensor.name }} {{- ", " if not loop.last }}
{%- endfor %}
}
;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &{{ kernel_name }}_forward, "{{ kernel_name }} forward (CUDA)",
{%- for tensor in forward_tensors -%}
"{{ tensor.name }}"_a {{ ", " if not loop.last }}
{%- endfor -%} );
m.def("backward", &{{ kernel_name }}_backward, "{{ kernel_name }} backward (CUDA)",
{%- for tensor in backward_tensors -%}
"{{ tensor.name }}"_a {{ ", " if not loop.last }}
{%- endfor -%} );
}
#include <ATen/ATen.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
{% for g in cuda_globals -%}
{{ g }}
{% endfor %}
template <typename scalar_t>
__global__ void {{ kernel_name }}_cuda_forward_kernel(
{% for tensor in forward_tensors -%}
{%- set last = loop.last -%}
scalar_t* __restrict__ _data_{{ tensor.name }},
{% for i in range(tensor.spatial_dimensions )-%}
int _stride_{{ tensor.name }}_{{ i }} {{- ", " }}
{% endfor -%}
{% endfor -%}
{% for i in range(forward_output_tensors[0].spatial_dimensions )-%}
int _size_{{ forward_output_tensors[0] }}_{{ i }} {{- "," if not loop.last }}
{% endfor %})
{
{{forward_kernel}}
}
template <typename scalar_t>
__global__ void {{ kernel_name }}_cuda_backward_kernel(
{% for tensor in backward_tensors -%}
{%- set last = loop.last -%}
scalar_t* __restrict__ _data_{{ tensor.name }},
{% for i in range(tensor.spatial_dimensions )-%}
int _stride_{{ tensor.name }}_{{ i }} {{- ", " }}
{% endfor -%}
{% endfor -%}
{% for i in range(forward_output_tensors[0].spatial_dimensions )-%}
int _size_{{ forward_output_tensors[0].name }}_{{ i }} {{- "," if not loop.last }}
{% endfor %})
{
{{backward_kernel}}
}
void {{ kernel_name }}_cuda_forward(
{%- for tensor in forward_tensors -%}
at::Tensor {{ tensor.name }} {{- "," if not loop.last -}}
{%- endfor -%})
{
{% for i in range(forward_output_tensors[0].spatial_dimensions )-%}
int _size_{{ forward_output_tensors[0].name }}_{{ i }} = {{ forward_output_tensors[0].name }}.size({{ i }});
{% endfor %}
/*at:: at::device(at::kCUDA).dtype(k{{ dtype }})*/
AT_DISPATCH_FLOATING_TYPES({{ forward_input_tensors[0].name }}.type(), "{{ kernel_name }}_forward_cuda", ([&] {
{{ kernel_name }}_cuda_forward_kernel<scalar_t><<<dim3{{ forward_blocks }}, dim3{{ forward_threads }}>>>(
{% for tensor in forward_tensors -%}
{%- set last = loop.last -%}
{{tensor.name}}.data<scalar_t>(),
{% for i in range(tensor.spatial_dimensions) -%}
{{tensor.name}}.strides()[{{ i }}] {{- "," }}
{% endfor -%}
{% endfor -%}
{% for i in range(forward_output_tensors[0].spatial_dimensions) -%}
{{ forward_output_tensors[0].name }}.size({{ i }}) {{- "," if not loop.last }}
{% endfor %}
);
}));
cudaError_t err = cudaGetLastError();
if (err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
throw err;
}
}
void {{ kernel_name }}_cuda_backward(
{%- for tensor in backward_tensors -%}
at::Tensor {{ tensor.name }} {{- ", " if not loop.last -}}
{%- endfor %})
{
{% for i in range(backward_output_tensors[0].spatial_dimensions )-%}
int _size_{{ backward_output_tensors[0].name }}_{{ i }} = {{ backward_output_tensors[0].name }}.size({{ i }});
{% endfor %}
/*at:: at::device(at::kCUDA).dtype(k{{ dtype }})*/
AT_DISPATCH_FLOATING_TYPES({{ backward_input_tensors[0].name }}.type(), "{{ kernel_name }}_backward_cuda", ([&] {
{{ kernel_name }}_cuda_backward_kernel<scalar_t><<<dim3{{ backward_blocks }}, dim3{{ backward_threads }}>>>(
{% for tensor in backward_tensors -%}
{%- set last = loop.last -%}
{{tensor.name}}.data<scalar_t>(),
{% for i in range(tensor.spatial_dimensions )-%}
{{tensor.name}}.strides()[{{ i }}]{{- ", " }}
{% endfor -%}
{% endfor -%}
{% for i in range(backward_output_tensors[0].spatial_dimensions )-%}
{{ backward_output_tensors[0].name }}.size({{ i }}) {{- "," if not loop.last }}
{% endfor %}
);
}));
}
try:
import tensorflow as tf
except Exception:
pass
def masked_mse(a, b, mask):
""" Mean squared error within mask """
return tf.losses.mean_pairwise_squared_error(tf.boolean_mask(a, mask), tf.boolean_mask(b, mask))
def total_var(tensor, norm=2):
pixel_dif1 = tensor[1:, :] - tensor[:-1, :]
pixel_dif2 = tensor[:, 1:] - tensor[:, :-1]
if norm == 2:
return tf.reduce_sum(pixel_dif1 * pixel_dif1) + tf.reduce_sum(pixel_dif2 * pixel_dif2)
if norm == 1:
return tf.reduce_sum(tf.abs(pixel_dif1)) + tf.reduce_sum(tf.abs(pixel_dif2))
def mean_total_var(tensor, norm=2):
pixel_dif1 = tensor[1:, :] - tensor[:-1, :]
pixel_dif2 = tensor[:, 1:] - tensor[:, :-1]
if norm == 2:
return (tf.reduce_mean(pixel_dif1 * pixel_dif1) + tf.reduce_mean(pixel_dif2 * pixel_dif2)) / 2
if norm == 1:
return (tf.reduce_mean(tf.abs(pixel_dif1)) + tf.reduce_mean(tf.abs(pixel_dif2))) / 2
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment