diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py index eb352202d13771e3ef77dc8fd31743577ba02eea..30be806b0adee90cbd61f47769023ee8f0e68743 100644 --- a/pystencils/datahandling/serial_datahandling.py +++ b/pystencils/datahandling/serial_datahandling.py @@ -127,8 +127,12 @@ class SerialDataHandling(DataHandling): # cpu_arr is always created - since there is no create_gpu_array_with_layout() byte_offset = ghost_layers * np.dtype(dtype).itemsize - cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment, - byte_offset=byte_offset, **kwargs) + + if gpu: + cpu_arr = self.array_handler.pinned_numpy_array(shape=kwargs['shape'], layout=layout_tuple, dtype=dtype) + else: + cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment, + byte_offset=byte_offset, **kwargs) if alignment and gpu: raise NotImplementedError("Alignment for GPU fields not supported") diff --git a/pystencils/gpu/gpu_array_handler.py b/pystencils/gpu/gpu_array_handler.py index 9aa0db386277cb7b7e06f486c92a8c79f619c009..9031036f42a29f5a8adb2e08905c990ec8b441e8 100644 --- a/pystencils/gpu/gpu_array_handler.py +++ b/pystencils/gpu/gpu_array_handler.py @@ -1,7 +1,9 @@ try: import cupy as cp + import cupyx as cpx except ImportError: cp = None + cpx = None import numpy as np import pystencils @@ -25,27 +27,75 @@ class GPUArrayHandler: @staticmethod def to_gpu(numpy_array): - with cp.cuda.Device(pystencils.GPU_DEVICE): + swaps = _get_index_swaps(numpy_array) + if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray): + with cp.cuda.Device(pystencils.GPU_DEVICE): + gpu_array = cp.asarray(numpy_array.base) + for a, b in reversed(swaps): + gpu_array = gpu_array.swapaxes(a, b) + return gpu_array + else: return cp.asarray(numpy_array) @staticmethod def upload(array, numpy_array): - with cp.cuda.Device(pystencils.GPU_DEVICE): - array.set(numpy_array) + if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray): + with cp.cuda.Device(pystencils.GPU_DEVICE): + array.base.set(numpy_array.base) + else: + with cp.cuda.Device(pystencils.GPU_DEVICE): + array.set(numpy_array) @staticmethod def download(array, numpy_array): - with cp.cuda.Device(pystencils.GPU_DEVICE): - numpy_array[:] = array.get() + if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray): + with cp.cuda.Device(pystencils.GPU_DEVICE): + numpy_array.base[:] = array.base.get() + else: + with cp.cuda.Device(pystencils.GPU_DEVICE): + numpy_array[:] = array.get() @staticmethod def randn(shape, dtype=np.float64): with cp.cuda.Device(pystencils.GPU_DEVICE): return cp.random.randn(*shape, dtype=dtype) + @staticmethod + def pinned_numpy_array(layout, shape, dtype): + assert set(layout) == set(range(len(shape))), "Wrong layout descriptor" + cur_layout = list(range(len(shape))) + swaps = [] + for i in range(len(layout)): + if cur_layout[i] != layout[i]: + index_to_swap_with = cur_layout.index(layout[i]) + swaps.append((i, index_to_swap_with)) + cur_layout[i], cur_layout[index_to_swap_with] = cur_layout[index_to_swap_with], cur_layout[i] + assert tuple(cur_layout) == tuple(layout) + + shape = list(shape) + for a, b in swaps: + shape[a], shape[b] = shape[b], shape[a] + + res = cpx.empty_pinned(tuple(shape), order='c', dtype=dtype) + + for a, b in reversed(swaps): + res = res.swapaxes(a, b) + return res + from_numpy = to_gpu class GPUNotAvailableHandler: def __getattribute__(self, name): raise NotImplementedError("Unable to utilise cupy! Please make sure cupy works correctly in your setup!") + + +def _get_index_swaps(array): + swaps = [] + if array.base is not None and isinstance(array.base, np.ndarray): + for stride in array.base.strides: + index_base = array.base.strides.index(stride) + index_view = array.strides.index(stride) + if index_base != index_view and (index_view, index_base) not in swaps: + swaps.append((index_base, index_view)) + return swaps diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py index 0f48ac2ca046c6241665f4d66643689dab087f67..7f73a865b036986e899c233045fe0d6387953696 100644 --- a/pystencils_tests/test_datahandling.py +++ b/pystencils_tests/test_datahandling.py @@ -251,6 +251,20 @@ def test_add_arrays(): assert y == dh.fields['y'] +@pytest.mark.parametrize('shape', [(17, 12), (7, 11, 18)]) +@pytest.mark.parametrize('layout', ['zyxf', 'fzyx']) +def test_add_arrays_with_layout(shape, layout): + pytest.importorskip('cupy') + + dh = create_data_handling(domain_size=shape, default_layout=layout, default_target=ps.Target.GPU) + f1 = dh.add_array("f1", values_per_cell=19) + dh.fill(f1.name, 1.0) + + assert dh.cpu_arrays[f1.name].shape == dh.gpu_arrays[f1.name].shape + assert dh.cpu_arrays[f1.name].strides == dh.gpu_arrays[f1.name].strides + assert dh.cpu_arrays[f1.name].dtype == dh.gpu_arrays[f1.name].dtype + + def test_get_kwarg(): domain_shape = (10, 10) field_description = 'src, dst' @@ -373,3 +387,18 @@ def test_array_handler(): random_array = array_handler.randn(size) + cpu_array = np.empty((20, 40), dtype=np.float64) + gpu_array = array_handler.to_gpu(cpu_array) + + assert cpu_array.base is None + assert gpu_array.base is None + assert gpu_array.strides == cpu_array.strides + + cpu_array2 = np.empty((20, 40), dtype=np.float64) + cpu_array2 = cpu_array2.swapaxes(0, 1) + gpu_array2 = array_handler.to_gpu(cpu_array2) + + assert cpu_array2.base is not None + assert gpu_array2.base is not None + assert gpu_array2.strides == cpu_array2.strides +