Skip to content
Snippets Groups Projects
Commit e6a30483 authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Extend SerialDataHandling to handle alternative GPU libraries

parent de753874
No related branches found
No related tags found
1 merge request!119Opencl datahandling
...@@ -20,7 +20,8 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -20,7 +20,8 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_layout: str = 'SoA', default_layout: str = 'SoA',
default_target: str = 'cpu', default_target: str = 'cpu',
parallel: bool = False, parallel: bool = False,
default_ghost_layers: int = 1) -> DataHandling: default_ghost_layers: int = 1,
opencl_queue=None) -> DataHandling:
"""Creates a data handling instance. """Creates a data handling instance.
Args: Args:
...@@ -33,6 +34,7 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -33,6 +34,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array' default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
""" """
if parallel: if parallel:
assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
if wlb is None: if wlb is None:
raise ValueError("Cannot create parallel data handling because walberla module is not available") raise ValueError("Cannot create parallel data handling because walberla module is not available")
...@@ -56,8 +58,12 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -56,8 +58,12 @@ def create_data_handling(domain_size: Tuple[int, ...],
return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target, return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target,
default_layout=default_layout, default_ghost_layers=default_ghost_layers) default_layout=default_layout, default_ghost_layers=default_ghost_layers)
else: else:
return SerialDataHandling(domain_size, periodicity=periodicity, default_target=default_target, return SerialDataHandling(domain_size,
default_layout=default_layout, default_ghost_layers=default_ghost_layers) periodicity=periodicity,
default_target=default_target,
default_layout=default_layout,
default_ghost_layers=default_ghost_layers,
opencl_queue=opencl_queue)
__all__ = ['create_data_handling'] __all__ = ['create_data_handling']
try:
import pycuda.gpuarray as gpuarray
except ImportError:
gpuarray = None
import numpy as np
import pystencils
class PyCudaArrayHandler:
def __init__(self):
import pycuda.autoinit # NOQA
def zeros(self, shape, dtype=np.float64, order='C'):
return gpuarray.zeros(shape, dtype, order)
def ones(self, shape, dtype, order='C'):
return gpuarray.ones(shape, dtype, order)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout)
return self.from_numpy(cpu_array)
else:
return gpuarray.empty(shape, dtype)
def to_gpu(self, array):
return gpuarray.to_gpu(array)
def upload(self, gpuarray, numpy_array):
gpuarray.set(numpy_array)
def download(self, gpuarray, numpy_array):
gpuarray.get(numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
return self.from_numpy(cpu_array)
try:
import pyopencl.array as gpuarray
except ImportError:
gpuarray = None
import numpy as np
import pystencils
class PyOpenClArrayHandler:
def __init__(self, queue):
if not queue:
from pystencils.opencl.opencljit import get_global_cl_queue
queue = get_global_cl_queue()
assert queue, "OpenCL queue missing"
self.queue = queue
def zeros(self, shape, dtype=np.float64, order='C'):
return gpuarray.zeros(shape, dtype, order)
def ones(self, shape, dtype, order='C'):
return gpuarray.ones(self.queue, shape, dtype, order)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout)
return self.from_numpy(cpu_array)
else:
return gpuarray.empty(self.queue, shape, dtype)
def to_gpu(self, array):
return gpuarray.to_device(self.queue, array)
def upload(self, gpuarray, numpy_array):
gpuarray.set(numpy_array, self.queue)
def download(self, gpuarray, numpy_array):
gpuarray.get(self.queue, numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
return self.from_numpy(cpu_array)
...@@ -6,22 +6,24 @@ import numpy as np ...@@ -6,22 +6,24 @@ import numpy as np
from pystencils.datahandling.blockiteration import SerialBlock from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.datahandling.pycuda import PyCudaArrayHandler
from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
from pystencils.field import ( from pystencils.field import (
Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, spatial_layout_string_to_tuple) Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, spatial_layout_string_to_tuple)
from pystencils.slicing import normalize_slice, remove_ghost_layers from pystencils.slicing import normalize_slice, remove_ghost_layers
from pystencils.utils import DotDict from pystencils.utils import DotDict
try:
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # NOQA
except ImportError:
gpuarray = None
class SerialDataHandling(DataHandling): class SerialDataHandling(DataHandling):
def __init__(self, domain_size: Sequence[int], default_ghost_layers: int = 1, default_layout: str = 'SoA', def __init__(self,
periodicity: Union[bool, Sequence[bool]] = False, default_target: str = 'cpu') -> None: domain_size: Sequence[int],
default_ghost_layers: int = 1,
default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False,
default_target: str = 'cpu',
opencl_queue=None,
array_handler=None) -> None:
""" """
Creates a data handling for single node simulations. Creates a data handling for single node simulations.
...@@ -43,6 +45,18 @@ class SerialDataHandling(DataHandling): ...@@ -43,6 +45,18 @@ class SerialDataHandling(DataHandling):
self.custom_data_gpu = DotDict() self.custom_data_gpu = DotDict()
self._custom_data_transfer_functions = {} self._custom_data_transfer_functions = {}
if array_handler:
self.array_handler = array_handler
else:
try:
self.array_handler = PyCudaArrayHandler()
except Exception:
self.array_handler = None
if default_target == 'opencl' or opencl_queue:
default_target = 'gpu'
self.array_handler = PyOpenClArrayHandler(opencl_queue)
if periodicity is None or periodicity is False: if periodicity is None or periodicity is False:
periodicity = [False] * self.dim periodicity = [False] * self.dim
if periodicity is True: if periodicity is True:
...@@ -126,7 +140,7 @@ class SerialDataHandling(DataHandling): ...@@ -126,7 +140,7 @@ class SerialDataHandling(DataHandling):
if gpu: if gpu:
if name in self.gpu_arrays: if name in self.gpu_arrays:
raise ValueError("GPU Field with this name already exists") raise ValueError("GPU Field with this name already exists")
self.gpu_arrays[name] = gpuarray.to_gpu(cpu_arr) self.gpu_arrays[name] = self.array_handler.to_gpu(cpu_arr)
assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists" assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists"
self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions, self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions,
...@@ -222,7 +236,8 @@ class SerialDataHandling(DataHandling): ...@@ -222,7 +236,8 @@ class SerialDataHandling(DataHandling):
self.to_gpu(name) self.to_gpu(name)
def run_kernel(self, kernel_function, **kwargs): def run_kernel(self, kernel_function, **kwargs):
arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' else self.cpu_arrays arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' \
or kernel_function.ast.backend == 'opencl' else self.cpu_arrays
kernel_function(**arrays, **kwargs) kernel_function(**arrays, **kwargs)
def get_kernel_kwargs(self, kernel_function, **kwargs): def get_kernel_kwargs(self, kernel_function, **kwargs):
...@@ -236,14 +251,14 @@ class SerialDataHandling(DataHandling): ...@@ -236,14 +251,14 @@ class SerialDataHandling(DataHandling):
transfer_func = self._custom_data_transfer_functions[name][1] transfer_func = self._custom_data_transfer_functions[name][1]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name]) transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else: else:
self.gpu_arrays[name].get(self.cpu_arrays[name]) self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name])
def to_gpu(self, name): def to_gpu(self, name):
if name in self._custom_data_transfer_functions: if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][0] transfer_func = self._custom_data_transfer_functions[name][0]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name]) transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else: else:
self.gpu_arrays[name].set(self.cpu_arrays[name]) self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name])
def is_on_gpu(self, name): def is_on_gpu(self, name):
return name in self.gpu_arrays return name in self.gpu_arrays
......
...@@ -6,6 +6,13 @@ import numpy as np ...@@ -6,6 +6,13 @@ import numpy as np
import pystencils as ps import pystencils as ps
from pystencils import create_data_handling, create_kernel from pystencils import create_data_handling, create_kernel
try:
import pytest
except ImportError:
import unittest.mock
pytest = unittest.mock.MagicMock()
def basic_iteration(dh): def basic_iteration(dh):
dh.add_array('basic_iter_test_gl_default') dh.add_array('basic_iter_test_gl_default')
...@@ -100,15 +107,9 @@ def synchronization(dh, test_gpu=False): ...@@ -100,15 +107,9 @@ def synchronization(dh, test_gpu=False):
np.testing.assert_equal(42, b[field_name]) np.testing.assert_equal(42, b[field_name])
def kernel_execution_jacobi(dh, test_gpu=False): def kernel_execution_jacobi(dh, target):
if test_gpu:
try:
from pycuda import driver
import pycuda.autoinit
except ImportError:
print("Skipping kernel_execution_jacobi GPU version, because pycuda not available")
return
test_gpu = target == 'gpu' or target == 'opencl'
dh.add_array('f', gpu=test_gpu) dh.add_array('f', gpu=test_gpu)
dh.add_array('tmp', gpu=test_gpu) dh.add_array('tmp', gpu=test_gpu)
stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)] stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
...@@ -119,7 +120,7 @@ def kernel_execution_jacobi(dh, test_gpu=False): ...@@ -119,7 +120,7 @@ def kernel_execution_jacobi(dh, test_gpu=False):
def jacobi(): def jacobi():
dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil) dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil)
kernel = create_kernel(jacobi, target='gpu' if test_gpu else 'cpu').compile() kernel = create_kernel(jacobi, target).compile()
for b in dh.iterate(ghost_layers=1): for b in dh.iterate(ghost_layers=1):
b['f'].fill(42) b['f'].fill(42)
dh.run_kernel(kernel) dh.run_kernel(kernel)
...@@ -196,17 +197,32 @@ def test_access_and_gather(): ...@@ -196,17 +197,32 @@ def test_access_and_gather():
def test_kernel(): def test_kernel():
for domain_shape in [(4, 5), (3, 4, 5)]: for domain_shape in [(4, 5), (3, 4, 5)]:
dh = create_data_handling(domain_size=domain_shape, periodicity=True) dh = create_data_handling(domain_size=domain_shape, periodicity=True)
kernel_execution_jacobi(dh, test_gpu=True) kernel_execution_jacobi(dh, 'cpu')
reduction(dh) reduction(dh)
try: try:
import pycuda import pycuda
dh = create_data_handling(domain_size=domain_shape, periodicity=True) dh = create_data_handling(domain_size=domain_shape, periodicity=True)
kernel_execution_jacobi(dh, test_gpu=False) kernel_execution_jacobi(dh, 'gpu')
except ImportError: except ImportError:
pass pass
@pytest.mark.parametrize('target', ('cpu', 'gpu', 'opencl'))
def test_kernel_param(target):
for domain_shape in [(4, 5), (3, 4, 5)]:
if target == 'gpu':
pytest.importorskip('pycuda')
if target == 'opencl':
pytest.importorskip('pyopencl')
from pystencils.opencl.opencljit import init_globally
init_globally()
dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target)
kernel_execution_jacobi(dh, target)
reduction(dh)
def test_vtk_output(): def test_vtk_output():
for domain_shape in [(4, 5), (3, 4, 5)]: for domain_shape in [(4, 5), (3, 4, 5)]:
dh = create_data_handling(domain_size=domain_shape, periodicity=True) dh = create_data_handling(domain_size=domain_shape, periodicity=True)
......
...@@ -50,13 +50,13 @@ def test_kernel(): ...@@ -50,13 +50,13 @@ def test_kernel():
# 3D # 3D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False) blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks) dh = ParallelDataHandling(blocks)
kernel_execution_jacobi(dh, test_gpu=gpu) kernel_execution_jacobi(dh, 'gpu')
reduction(dh) reduction(dh)
# 2D # 2D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False) blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, dim=2) dh = ParallelDataHandling(blocks, dim=2)
kernel_execution_jacobi(dh, test_gpu=gpu) kernel_execution_jacobi(dh, 'gpu')
reduction(dh) reduction(dh)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment