Skip to content
Snippets Groups Projects
Commit 08ed41d1 authored by Markus Holzer's avatar Markus Holzer Committed by Michael Kuron
Browse files

Implement Pinned GPU memory

parent 9d897d24
No related merge requests found
...@@ -127,8 +127,12 @@ class SerialDataHandling(DataHandling): ...@@ -127,8 +127,12 @@ class SerialDataHandling(DataHandling):
# cpu_arr is always created - since there is no create_gpu_array_with_layout() # cpu_arr is always created - since there is no create_gpu_array_with_layout()
byte_offset = ghost_layers * np.dtype(dtype).itemsize byte_offset = ghost_layers * np.dtype(dtype).itemsize
cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
byte_offset=byte_offset, **kwargs) if gpu:
cpu_arr = self.array_handler.pinned_numpy_array(shape=kwargs['shape'], layout=layout_tuple, dtype=dtype)
else:
cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
byte_offset=byte_offset, **kwargs)
if alignment and gpu: if alignment and gpu:
raise NotImplementedError("Alignment for GPU fields not supported") raise NotImplementedError("Alignment for GPU fields not supported")
......
try: try:
import cupy as cp import cupy as cp
import cupyx as cpx
except ImportError: except ImportError:
cp = None cp = None
cpx = None
import numpy as np import numpy as np
import pystencils import pystencils
...@@ -25,27 +27,75 @@ class GPUArrayHandler: ...@@ -25,27 +27,75 @@ class GPUArrayHandler:
@staticmethod @staticmethod
def to_gpu(numpy_array): def to_gpu(numpy_array):
with cp.cuda.Device(pystencils.GPU_DEVICE): swaps = _get_index_swaps(numpy_array)
if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
with cp.cuda.Device(pystencils.GPU_DEVICE):
gpu_array = cp.asarray(numpy_array.base)
for a, b in reversed(swaps):
gpu_array = gpu_array.swapaxes(a, b)
return gpu_array
else:
return cp.asarray(numpy_array) return cp.asarray(numpy_array)
@staticmethod @staticmethod
def upload(array, numpy_array): def upload(array, numpy_array):
with cp.cuda.Device(pystencils.GPU_DEVICE): if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
array.set(numpy_array) with cp.cuda.Device(pystencils.GPU_DEVICE):
array.base.set(numpy_array.base)
else:
with cp.cuda.Device(pystencils.GPU_DEVICE):
array.set(numpy_array)
@staticmethod @staticmethod
def download(array, numpy_array): def download(array, numpy_array):
with cp.cuda.Device(pystencils.GPU_DEVICE): if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
numpy_array[:] = array.get() with cp.cuda.Device(pystencils.GPU_DEVICE):
numpy_array.base[:] = array.base.get()
else:
with cp.cuda.Device(pystencils.GPU_DEVICE):
numpy_array[:] = array.get()
@staticmethod @staticmethod
def randn(shape, dtype=np.float64): def randn(shape, dtype=np.float64):
with cp.cuda.Device(pystencils.GPU_DEVICE): with cp.cuda.Device(pystencils.GPU_DEVICE):
return cp.random.randn(*shape, dtype=dtype) return cp.random.randn(*shape, dtype=dtype)
@staticmethod
def pinned_numpy_array(layout, shape, dtype):
assert set(layout) == set(range(len(shape))), "Wrong layout descriptor"
cur_layout = list(range(len(shape)))
swaps = []
for i in range(len(layout)):
if cur_layout[i] != layout[i]:
index_to_swap_with = cur_layout.index(layout[i])
swaps.append((i, index_to_swap_with))
cur_layout[i], cur_layout[index_to_swap_with] = cur_layout[index_to_swap_with], cur_layout[i]
assert tuple(cur_layout) == tuple(layout)
shape = list(shape)
for a, b in swaps:
shape[a], shape[b] = shape[b], shape[a]
res = cpx.empty_pinned(tuple(shape), order='c', dtype=dtype)
for a, b in reversed(swaps):
res = res.swapaxes(a, b)
return res
from_numpy = to_gpu from_numpy = to_gpu
class GPUNotAvailableHandler: class GPUNotAvailableHandler:
def __getattribute__(self, name): def __getattribute__(self, name):
raise NotImplementedError("Unable to utilise cupy! Please make sure cupy works correctly in your setup!") raise NotImplementedError("Unable to utilise cupy! Please make sure cupy works correctly in your setup!")
def _get_index_swaps(array):
swaps = []
if array.base is not None and isinstance(array.base, np.ndarray):
for stride in array.base.strides:
index_base = array.base.strides.index(stride)
index_view = array.strides.index(stride)
if index_base != index_view and (index_view, index_base) not in swaps:
swaps.append((index_base, index_view))
return swaps
...@@ -251,6 +251,20 @@ def test_add_arrays(): ...@@ -251,6 +251,20 @@ def test_add_arrays():
assert y == dh.fields['y'] assert y == dh.fields['y']
@pytest.mark.parametrize('shape', [(17, 12), (7, 11, 18)])
@pytest.mark.parametrize('layout', ['zyxf', 'fzyx'])
def test_add_arrays_with_layout(shape, layout):
pytest.importorskip('cupy')
dh = create_data_handling(domain_size=shape, default_layout=layout, default_target=ps.Target.GPU)
f1 = dh.add_array("f1", values_per_cell=19)
dh.fill(f1.name, 1.0)
assert dh.cpu_arrays[f1.name].shape == dh.gpu_arrays[f1.name].shape
assert dh.cpu_arrays[f1.name].strides == dh.gpu_arrays[f1.name].strides
assert dh.cpu_arrays[f1.name].dtype == dh.gpu_arrays[f1.name].dtype
def test_get_kwarg(): def test_get_kwarg():
domain_shape = (10, 10) domain_shape = (10, 10)
field_description = 'src, dst' field_description = 'src, dst'
...@@ -373,3 +387,18 @@ def test_array_handler(): ...@@ -373,3 +387,18 @@ def test_array_handler():
random_array = array_handler.randn(size) random_array = array_handler.randn(size)
cpu_array = np.empty((20, 40), dtype=np.float64)
gpu_array = array_handler.to_gpu(cpu_array)
assert cpu_array.base is None
assert gpu_array.base is None
assert gpu_array.strides == cpu_array.strides
cpu_array2 = np.empty((20, 40), dtype=np.float64)
cpu_array2 = cpu_array2.swapaxes(0, 1)
gpu_array2 = array_handler.to_gpu(cpu_array2)
assert cpu_array2.base is not None
assert gpu_array2.base is not None
assert gpu_array2.strides == cpu_array2.strides
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment