-
Stephan Seitz authoredStephan Seitz authored
_pytorch.py 4.69 KiB
import uuid
import numpy as np
import torch
try:
import pycuda.autoinit
import pycuda.gpuarray
import pycuda.driver
HAS_PYCUDA = True
except Exception:
HAS_PYCUDA = False
# Fails if different context/thread
def tensor_to_gpuarray(tensor):
if not tensor.is_cuda:
raise ValueError(
'Cannot convert CPU tensor to GPUArray (call `cuda()` on it)')
else:
return pycuda.gpuarray.GPUArray(tensor.shape,
dtype=torch_dtype_to_numpy(tensor.dtype),
gpudata=tensor.data_ptr())
def create_autograd_function(autodiff_obj, inputfield_to_tensor_dict, forward_loop, backward_loop,
convert_tensors_to_arrays=True):
field_to_tensor_dict = inputfield_to_tensor_dict
backward_input_fields = autodiff_obj.backward_input_fields
# Allocate output tensor for forward and backward pass
for field in autodiff_obj.forward_output_fields + autodiff_obj.backward_output_fields:
field_to_tensor_dict[field] = torch.zeros(
*field.shape,
dtype=numpy_dtype_to_torch(field.dtype.numpy_dtype),
device=list(inputfield_to_tensor_dict.values())[0].device)
tensor_to_field_dict = {
v: k for k, v in field_to_tensor_dict.items()}
def _tensors_to_dict(is_cuda, args, additional_dict={}):
arrays = dict()
lookup_dict = {**tensor_to_field_dict, **additional_dict}
for a in args:
if convert_tensors_to_arrays:
if is_cuda:
a.cuda()
array = tensor_to_gpuarray(a)
else:
a.cpu()
array = a.data.numpy()
try:
arrays[lookup_dict[a].name] = array
except:
pass
else:
array = a
try:
arrays[lookup_dict[a].name] = array
except:
pass
return arrays
def forward(self, *input_tensors):
self.save_for_backward(*input_tensors)
all_tensors = field_to_tensor_dict.values()
is_cuda = all(a.is_cuda for a in all_tensors)
arrays = _tensors_to_dict(is_cuda, all_tensors)
forward_loop(**arrays, is_cuda=is_cuda)
return tuple(field_to_tensor_dict[f] for f in autodiff_obj.forward_output_fields)
def backward(self, *grad_outputs):
all_tensors = grad_outputs + tuple(field_to_tensor_dict.values())
is_cuda = all(a.is_cuda for a in all_tensors)
arrays = _tensors_to_dict(is_cuda, all_tensors, additional_dict={
f.name: grad_outputs[i] for i, f in enumerate(backward_input_fields)})
backward_loop(**arrays, is_cuda=is_cuda)
return tuple(field_to_tensor_dict[f] for f in autodiff_obj.backward_output_fields)
cls = type(str(uuid.uuid4()), (torch.autograd.Function,), {})
cls.forward = forward
cls.backward = backward
return cls
# from: https://stackoverflow.com/questions/51438232/how-can-i-create-a-pycuda-gpuarray-from-a-gpu-memory-address
def torch_dtype_to_numpy(dtype):
dtype_name = str(dtype).replace('torch.', '') # remove 'torch.'
return getattr(np, dtype_name)
def numpy_dtype_to_torch(dtype):
dtype_name = str(dtype)
return getattr(torch, dtype_name)
def gpuarray_to_tensor(gpuarray, context=None):
"""
Convert a :class:`pycuda.gpuarray.GPUArray` to a :class:`torch.Tensor`. The underlying
storage will NOT be shared, since a new copy must be allocated.
Parameters
----------
gpuarray : pycuda.gpuarray.GPUArray
Returns
-------
torch.Tensor
"""
if not context:
context = pycuda.autoinit.context
shape = gpuarray.shape
dtype = gpuarray.dtype
out_dtype = dtype
out = torch.zeros(shape, dtype=out_dtype).cuda()
gpuarray_copy = tensor_to_gpuarray(out)
byte_size = gpuarray.itemsize * gpuarray.size
pycuda.driver.memcpy_dtod(gpuarray_copy.gpudata,
gpuarray.gpudata, byte_size)
return out
if HAS_PYCUDA:
class GpuPointerHolder(pycuda.driver.PointerHolderBase):
def __init__(self, tensor):
super().__init__()
self.tensor = tensor
self.gpudata = tensor.data_ptr()
def get_pointer(self):
return self.tensor.data_ptr()
def __int__(self):
return self.__index__()
# without an __index__ method, arithmetic calls to the GPUArray backed by this pointer fail
# not sure why, this needs to return some integer, apparently
def __index__(self):
return self.gpudata
else:
GpuPointerHolder = None