Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (4)
Showing
with 113 additions and 613 deletions
# Change Log
## Unreleased
### Removed
* LLVM backend because it was not used much and not good integrated in pystencils.
* OpenCL backend because it was not used much and not good integrated in pystencils.
...@@ -2,6 +2,7 @@ include README.md ...@@ -2,6 +2,7 @@ include README.md
include COPYING.txt include COPYING.txt
include AUTHORS.txt include AUTHORS.txt
include CONTRIBUTING.md include CONTRIBUTING.md
CHANGELOG.md
global-include *.pyx global-include *.pyx
include versioneer.py include versioneer.py
include pystencils/_version.py include pystencils/_version.py
...@@ -53,14 +53,12 @@ Without `[interactive]` you get a minimal version with very little dependencies. ...@@ -53,14 +53,12 @@ Without `[interactive]` you get a minimal version with very little dependencies.
All options: All options:
- `gpu`: use this if an NVIDIA GPU is available and CUDA is installed - `gpu`: use this if an NVIDIA GPU is available and CUDA is installed
- `opencl`: basic OpenCL support (experimental)
- `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl - `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
- `bench_db`: functionality to store benchmark result in object databases - `bench_db`: functionality to store benchmark result in object databases
- `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc. - `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
- `autodiff`: enable derivation of adjoint kernels and generation of Torch/Tensorflow operations - `autodiff`: enable derivation of adjoint kernels and generation of Torch/Tensorflow operations
- `doc`: packages to build documentation - `doc`: packages to build documentation
- `kerncraft`: use kerncraft for automatic performance analysis - `kerncraft`: use kerncraft for automatic performance analysis
- `llvm_jit`: llvmlite as additional CPU backend
Options can be combined e.g. Options can be combined e.g.
```bash ```bash
...@@ -73,12 +71,12 @@ Documentation ...@@ -73,12 +71,12 @@ Documentation
------------- -------------
Read the docs [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and Read the docs [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and
check out the Jupyter notebooks in `doc/notebooks`. check out the Jupyter notebooks in `doc/notebooks`. The **Changelog** of pystencils can be found [here](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/CHANGELOG.md).
Authors Authors
------- -------
Many thanks go to the [contributors](AUTHORS.txt) of pystencils. Many thanks go to the [contributors](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/AUTHORS.txt) of pystencils.
### Please cite us ### Please cite us
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -6,9 +6,3 @@ try: ...@@ -6,9 +6,3 @@ try:
__all__.append('print_dot') __all__.append('print_dot')
except ImportError: except ImportError:
pass pass
try:
from .llvm import generate_llvm # NOQA
__all__.append('generate_llvm')
except ImportError:
pass
...@@ -47,7 +47,7 @@ def generate_c(ast_node: Node, ...@@ -47,7 +47,7 @@ def generate_c(ast_node: Node,
Args: Args:
ast_node: ast representation of kernel ast_node: ast representation of kernel
signature_only: generate signature without function body signature_only: generate signature without function body
dialect: `Backend`: 'C', 'CUDA' or 'OPENCL' dialect: `Backend`: 'C' or 'CUDA'
custom_backend: use own custom printer for code generation custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables with_globals: enable usage of global variables
Returns: Returns:
...@@ -71,9 +71,6 @@ def generate_c(ast_node: Node, ...@@ -71,9 +71,6 @@ def generate_c(ast_node: Node,
elif dialect == Backend.CUDA: elif dialect == Backend.CUDA:
from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.cuda_backend import CudaBackend
printer = CudaBackend(signature_only=signature_only) printer = CudaBackend(signature_only=signature_only)
elif dialect == Backend.OPENCL:
from pystencils.backends.opencl_backend import OpenClBackend
printer = OpenClBackend(signature_only=signature_only)
else: else:
raise ValueError(f'Unknown {dialect=}') raise ValueError(f'Unknown {dialect=}')
code = printer(ast_node) code = printer(ast_node)
......
acos
acosh
acospi
asin
asinh
asinpi
atan
atan2
atanh
atanpi
atan2pi
cbrt
ceil
copysign
cos
cosh
cospi
erfc
erf
exp
exp2
exp10
expm1
fabs
fdim
floor
fma
fmax
fmax
fmin45
fmin
fmod
fract
frexp
hypot
ilogb
ldexp
lgamma
lgamma_r
log
log2
log10
log1p
logb
mad
maxmag
minmag
modf
nextafter
pow
pown
powr
remquo
intn
remquo
rint
rootn
rootn
round
rsqrt
sin
sincos
sinh
sinpi
sqrt
tan
tanh
tanpi
tgamma
trunc
half_cos
half_divide
half_exp
half_exp2
half_exp10
half_log
half_log2
half_log10
half_powr
half_recip
half_rsqrt
half_sin
half_sqrt
half_tan
native_cos
native_divide
native_exp
native_exp2
native_exp10
native_log
native_log2
native_log10
native_powr
native_recip
native_rsqrt
native_sin
native_sqrt
native_tan
from os.path import dirname, join
import pystencils.data_types
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
lines = f.readlines()
OPENCL_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}
def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str:
"""Prints an abstract syntax tree node (made for `Target` 'GPU') as OpenCL code. # TODO Backend instead of Target?
Args:
ast_node: ast representation of kernel
signature_only: generate signature without function body
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
OpenCL code for the ast node and its descendants
"""
return generate_c(ast_node, signature_only, dialect=Backend.OPENCL,
custom_backend=custom_backend, with_globals=with_globals)
class OpenClBackend(CudaBackend):
def __init__(self,
sympy_printer=None,
signature_only=False):
if not sympy_printer:
sympy_printer = OpenClSympyPrinter()
super().__init__(sympy_printer, signature_only)
self._dialect = Backend.OPENCL
def _print_Type(self, node):
code = super()._print_Type(node)
if isinstance(node, pystencils.data_types.PointerType):
return "__global " + code
else:
return code
def _print_ThreadBlockSynchronization(self, node):
raise NotImplementedError()
def _print_TextureDeclaration(self, node):
raise NotImplementedError()
class OpenClSympyPrinter(CudaSympyPrinter):
language = "OpenCL"
DIMENSION_MAPPING = {
'x': '0',
'y': '1',
'z': '2'
}
INDEXING_FUNCTION_MAPPING = {
'blockIdx': 'get_group_id',
'threadIdx': 'get_local_id',
'blockDim': 'get_local_size',
'gridDim': 'get_global_size'
}
def __init__(self):
CustomSympyPrinter.__init__(self)
self.known_functions = OPENCL_KNOWN_FUNCTIONS
def _print_Type(self, node):
code = super()._print_Type(node)
if isinstance(node, pystencils.data_types.PointerType):
return "__global " + code
else:
return code
def _print_ThreadIndexingSymbol(self, node):
symbol_name: str = node.name
function_name, dimension = tuple(symbol_name.split("."))
dimension = self.DIMENSION_MAPPING[dimension]
function_name = self.INDEXING_FUNCTION_MAPPING[function_name]
return f"(int64_t) {function_name}({dimension})"
def _print_TextureAccess(self, node):
raise NotImplementedError()
# For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter
# since built-in math functions are generic.
# In CUDA, you have to differentiate between `sin` and `sinf`
try:
_print_math_func = CustomSympyPrinter._print_math_func
except AttributeError:
pass
_print_Pow = CustomSympyPrinter._print_Pow
def _print_Function(self, expr):
if isinstance(expr, fast_division):
return "native_divide(%s, %s)" % tuple(self._print(a) for a in expr.args)
elif isinstance(expr, fast_sqrt):
return f"native_sqrt({tuple(self._print(a) for a in expr.args)})"
elif isinstance(expr, fast_inv_sqrt):
return f"native_rsqrt({tuple(self._print(a) for a in expr.args)})"
return CustomSympyPrinter._print_Function(self, expr)
...@@ -138,22 +138,11 @@ def create_folder(path, is_file): ...@@ -138,22 +138,11 @@ def create_folder(path, is_file):
pass pass
def get_llc_command():
"""Try to get executable for llvm's IR compiler llc
We try if one of the following is in PATH: llc, llc-10, llc-9, llc-8, llc-7, llc-6
"""
candidates = ['llc', 'llc-10', 'llc-9', 'llc-8', 'llc-7', 'llc-6']
found_executables = (e for e in candidates if shutil.which(e))
return next(found_executables, None)
def read_config(): def read_config():
if platform.system().lower() == 'linux': if platform.system().lower() == 'linux':
default_compiler_config = OrderedDict([ default_compiler_config = OrderedDict([
('os', 'linux'), ('os', 'linux'),
('command', 'g++'), ('command', 'g++'),
('llc_command', get_llc_command() or 'llc'),
('flags', '-Ofast -DNDEBUG -fPIC -march=native -fopenmp -std=c++11'), ('flags', '-Ofast -DNDEBUG -fPIC -march=native -fopenmp -std=c++11'),
('restrict_qualifier', '__restrict__') ('restrict_qualifier', '__restrict__')
]) ])
...@@ -164,7 +153,6 @@ def read_config(): ...@@ -164,7 +153,6 @@ def read_config():
default_compiler_config = OrderedDict([ default_compiler_config = OrderedDict([
('os', 'windows'), ('os', 'windows'),
('msvc_version', 'latest'), ('msvc_version', 'latest'),
('llc_command', get_llc_command() or 'llc'),
('arch', 'x64'), ('arch', 'x64'),
('flags', '/Ox /fp:fast /OpenMP /arch:avx'), ('flags', '/Ox /fp:fast /OpenMP /arch:avx'),
('restrict_qualifier', '__restrict') ('restrict_qualifier', '__restrict')
...@@ -173,7 +161,6 @@ def read_config(): ...@@ -173,7 +161,6 @@ def read_config():
default_compiler_config = OrderedDict([ default_compiler_config = OrderedDict([
('os', 'darwin'), ('os', 'darwin'),
('command', 'clang++'), ('command', 'clang++'),
('llc_command', get_llc_command() or 'llc'),
('flags', '-Ofast -DNDEBUG -fPIC -march=native -Xclang -fopenmp -std=c++11'), ('flags', '-Ofast -DNDEBUG -fPIC -march=native -Xclang -fopenmp -std=c++11'),
('restrict_qualifier', '__restrict__') ('restrict_qualifier', '__restrict__')
]) ])
......
...@@ -13,12 +13,6 @@ import pystencils ...@@ -13,12 +13,6 @@ import pystencils
from pystencils.cache import memorycache, memorycache_if_hashable from pystencils.cache import memorycache, memorycache_if_hashable
from pystencils.utils import all_equal from pystencils.utils import all_equal
try:
import llvmlite.ir as ir
except ImportError as e:
ir = None
_ir_importerror = e
def typed_symbols(names, dtype, *args): def typed_symbols(names, dtype, *args):
symbols = sp.symbols(names, *args) symbols = sp.symbols(names, *args)
...@@ -373,67 +367,6 @@ to_ctypes.map = { ...@@ -373,67 +367,6 @@ to_ctypes.map = {
} }
def ctypes_from_llvm(data_type):
if not ir:
raise _ir_importerror
if isinstance(data_type, ir.PointerType):
ctype = ctypes_from_llvm(data_type.pointee)
if ctype is None:
return ctypes.c_void_p
else:
return ctypes.POINTER(ctype)
elif isinstance(data_type, ir.IntType):
if data_type.width == 8:
return ctypes.c_int8
elif data_type.width == 16:
return ctypes.c_int16
elif data_type.width == 32:
return ctypes.c_int32
elif data_type.width == 64:
return ctypes.c_int64
else:
raise ValueError("Int width %d is not supported" % data_type.width)
elif isinstance(data_type, ir.FloatType):
return ctypes.c_float
elif isinstance(data_type, ir.DoubleType):
return ctypes.c_double
elif isinstance(data_type, ir.VoidType):
return None # Void type is not supported by ctypes
else:
raise NotImplementedError(f'Data type {type(data_type)} of {data_type} is not supported yet')
def to_llvm_type(data_type, nvvm_target=False):
"""
Transforms a given type into ctypes
:param data_type: Subclass of Type
:return: llvmlite type object
"""
if not ir:
raise _ir_importerror
if isinstance(data_type, PointerType):
return to_llvm_type(data_type.base_type).as_pointer(1 if nvvm_target else 0)
else:
return to_llvm_type.map[data_type.numpy_dtype]
if ir:
to_llvm_type.map = {
np.dtype(np.int8): ir.IntType(8),
np.dtype(np.int16): ir.IntType(16),
np.dtype(np.int32): ir.IntType(32),
np.dtype(np.int64): ir.IntType(64),
np.dtype(np.uint8): ir.IntType(8),
np.dtype(np.uint16): ir.IntType(16),
np.dtype(np.uint32): ir.IntType(32),
np.dtype(np.uint64): ir.IntType(64),
np.dtype(np.float32): ir.FloatType(),
np.dtype(np.float64): ir.DoubleType(),
}
def peel_off_type(dtype, type_to_peel_off): def peel_off_type(dtype, type_to_peel_off):
while type(dtype) is type_to_peel_off: while type(dtype) is type_to_peel_off:
dtype = dtype.base_type dtype = dtype.base_type
......
...@@ -23,8 +23,7 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -23,8 +23,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_layout: str = 'SoA', default_layout: str = 'SoA',
default_target: Target = Target.CPU, default_target: Target = Target.CPU,
parallel: bool = False, parallel: bool = False,
default_ghost_layers: int = 1, default_ghost_layers: int = 1) -> DataHandling:
opencl_queue=None) -> DataHandling:
"""Creates a data handling instance. """Creates a data handling instance.
Args: Args:
...@@ -43,7 +42,6 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -43,7 +42,6 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_target = new_target default_target = new_target
if parallel: if parallel:
assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
if wlb is None: if wlb is None:
raise ValueError("Cannot create parallel data handling because walberla module is not available") raise ValueError("Cannot create parallel data handling because walberla module is not available")
...@@ -71,8 +69,7 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -71,8 +69,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity=periodicity, periodicity=periodicity,
default_target=default_target, default_target=default_target,
default_layout=default_layout, default_layout=default_layout,
default_ghost_layers=default_ghost_layers, default_ghost_layers=default_ghost_layers)
opencl_queue=opencl_queue)
__all__ = ['create_data_handling'] __all__ = ['create_data_handling']
...@@ -17,8 +17,8 @@ class DataHandling(ABC): ...@@ -17,8 +17,8 @@ class DataHandling(ABC):
'gather' function that has collects (parts of the) distributed data on a single process. 'gather' function that has collects (parts of the) distributed data on a single process.
""" """
_GPU_LIKE_TARGETS = [Target.GPU, Target.OPENCL] _GPU_LIKE_TARGETS = [Target.GPU]
_GPU_LIKE_BACKENDS = [Backend.CUDA, Backend.OPENCL] _GPU_LIKE_BACKENDS = [Backend.CUDA]
# ---------------------------- Adding and accessing data ----------------------------------------------------------- # ---------------------------- Adding and accessing data -----------------------------------------------------------
@property @property
......
try:
import pyopencl.array as gpuarray
except ImportError:
gpuarray = None
import numpy as np
import pystencils
class PyOpenClArrayHandler:
def __init__(self, queue):
if not queue:
from pystencils.opencl.opencljit import get_global_cl_queue
queue = get_global_cl_queue()
assert queue, "OpenCL queue missing!\n" \
"Use `import pystencils.opencl.autoinit` if you want it to be automatically created"
self.queue = queue
def zeros(self, shape, dtype=np.float64, order='C'):
cpu_array = np.zeros(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def ones(self, shape, dtype=np.float64, order='C'):
cpu_array = np.ones(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape=shape, dtype=dtype, layout=layout)
return self.to_gpu(cpu_array)
else:
return gpuarray.empty(self.queue, shape, dtype)
def to_gpu(self, array):
return gpuarray.to_device(self.queue, array)
def upload(self, gpuarray, numpy_array):
gpuarray.set(numpy_array, self.queue)
def download(self, gpuarray, numpy_array):
gpuarray.get(self.queue, numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
return self.from_numpy(cpu_array)
from_numpy = to_gpu
...@@ -7,7 +7,6 @@ import numpy as np ...@@ -7,7 +7,6 @@ import numpy as np
from pystencils.datahandling.blockiteration import SerialBlock from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler
from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
from pystencils.enums import Target from pystencils.enums import Target
from pystencils.field import ( from pystencils.field import (
Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple,
...@@ -24,8 +23,6 @@ class SerialDataHandling(DataHandling): ...@@ -24,8 +23,6 @@ class SerialDataHandling(DataHandling):
default_layout: str = 'SoA', default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False, periodicity: Union[bool, Sequence[bool]] = False,
default_target: Target = Target.CPU, default_target: Target = Target.CPU,
opencl_queue=None,
opencl_ctx=None,
array_handler=None) -> None: array_handler=None) -> None:
""" """
Creates a data handling for single node simulations. Creates a data handling for single node simulations.
...@@ -48,17 +45,12 @@ class SerialDataHandling(DataHandling): ...@@ -48,17 +45,12 @@ class SerialDataHandling(DataHandling):
self.custom_data_cpu = DotDict() self.custom_data_cpu = DotDict()
self.custom_data_gpu = DotDict() self.custom_data_gpu = DotDict()
self._custom_data_transfer_functions = {} self._custom_data_transfer_functions = {}
self._opencl_queue = opencl_queue
self._opencl_ctx = opencl_ctx
if not array_handler: if not array_handler:
try: try:
self.array_handler = PyCudaArrayHandler() self.array_handler = PyCudaArrayHandler()
except Exception: except Exception:
self.array_handler = PyCudaNotAvailableHandler() self.array_handler = PyCudaNotAvailableHandler()
if default_target == Target.OPENCL or opencl_queue:
self.array_handler = PyOpenClArrayHandler(opencl_queue)
else: else:
self.array_handler = array_handler self.array_handler = array_handler
...@@ -280,8 +272,6 @@ class SerialDataHandling(DataHandling): ...@@ -280,8 +272,6 @@ class SerialDataHandling(DataHandling):
def synchronization_function(self, names, stencil=None, target=None, functor=None, **_): def synchronization_function(self, names, stencil=None, target=None, functor=None, **_):
if target is None: if target is None:
target = self.default_target target = self.default_target
if target == Target.OPENCL: # TODO potential misuse between Target and Backend
target = Target.GPU
assert target in (Target.CPU, Target.GPU) assert target in (Target.CPU, Target.GPU)
if not hasattr(names, '__len__') or type(names) is str: if not hasattr(names, '__len__') or type(names) is str:
names = [names] names = [names]
...@@ -324,16 +314,13 @@ class SerialDataHandling(DataHandling): ...@@ -324,16 +314,13 @@ class SerialDataHandling(DataHandling):
else: else:
if functor is None: if functor is None:
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor
target = Target.GPU if not isinstance(self.array_handler, target = Target.GPU
PyOpenClArrayHandler) else Target.OPENCL
result.append(functor(filtered_stencil, self._domainSize, result.append(functor(filtered_stencil, self._domainSize,
index_dimensions=self.fields[name].index_dimensions, index_dimensions=self.fields[name].index_dimensions,
index_dim_shape=values_per_cell, index_dim_shape=values_per_cell,
dtype=self.fields[name].dtype.numpy_dtype, dtype=self.fields[name].dtype.numpy_dtype,
ghost_layers=gls, ghost_layers=gls,
target=target, target=target))
opencl_queue=self._opencl_queue,
opencl_ctx=self._opencl_ctx))
if target == Target.CPU: if target == Target.CPU:
def result_functor(): def result_functor():
......
...@@ -46,7 +46,7 @@ def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None) ...@@ -46,7 +46,7 @@ def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None)
if isinstance(ast, KernelWrapper): if isinstance(ast, KernelWrapper):
ast = ast.ast ast = ast.ast
if ast.backend not in {Backend.C, Backend.CUDA, Backend.OPENCL}: if ast.backend not in {Backend.C, Backend.CUDA}:
raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}') raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}')
dialect = ast.backend dialect = ast.backend
......
...@@ -13,10 +13,6 @@ class Target(Enum): ...@@ -13,10 +13,6 @@ class Target(Enum):
""" """
Target GPU architecture. Target GPU architecture.
""" """
OPENCL = auto()
"""
Target all architectures OpenCL covers (Thus both, Target and Backend)
"""
class Backend(Enum): class Backend(Enum):
...@@ -28,16 +24,7 @@ class Backend(Enum): ...@@ -28,16 +24,7 @@ class Backend(Enum):
""" """
Use the C Backend of pystencils. Use the C Backend of pystencils.
""" """
LLVM = auto()
r"""
Use the ``llvmlite`` package to transform the pystensilc AST to the LLVM ir.
From this point all of LLVMs optimisations can be used.
"""
CUDA = auto() CUDA = auto()
""" """
Use the CUDA backend to generate code for NVIDIA GPUs. Use the CUDA backend to generate code for NVIDIA GPUs.
""" """
OPENCL = auto()
"""
Use the OpenCL backend to generate code for OpenCL.
"""
...@@ -2,7 +2,6 @@ import numpy as np ...@@ -2,7 +2,6 @@ import numpy as np
from itertools import product from itertools import product
import pystencils.gpucuda import pystencils.gpucuda
import pystencils.opencl
from pystencils import Assignment, Field from pystencils import Assignment, Field
from pystencils.gpucuda.kernelcreation import create_cuda_kernel from pystencils.gpucuda.kernelcreation import create_cuda_kernel
from pystencils.enums import Target from pystencils.enums import Target
...@@ -32,19 +31,14 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in ...@@ -32,19 +31,14 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in
def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1, def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1,
thickness=None, dtype=float, target=Target.GPU, opencl_queue=None, opencl_ctx=None): thickness=None, dtype=float, target=Target.GPU):
assert target in {Target.GPU, Target.OPENCL} assert target in {Target.GPU}
src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness) src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness)
kernels = [] kernels = []
for src_slice, dst_slice in src_dst_slice_tuples: for src_slice, dst_slice in src_dst_slice_tuples:
ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype) ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype)
if target == pystencils.Target.GPU: kernels.append(pystencils.gpucuda.make_python_function(ast))
kernels.append(pystencils.gpucuda.make_python_function(ast))
else:
ast._target = pystencils.Target.OPENCL
ast._backend = pystencils.Backend.OPENCL
kernels.append(pystencils.opencl.make_python_function(ast, opencl_queue, opencl_ctx))
def functor(pdfs, **_): def functor(pdfs, **_):
for kernel in kernels: for kernel in kernels:
......
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
from typing import Union
import numpy as np
try:
import pycuda.driver as cuda
from pycuda import gpuarray
import pycuda
except Exception:
pass
def ndarray_to_tex(tex_ref, # type: Union[cuda.TextureReference, cuda.SurfaceReference]
ndarray,
address_mode=None,
filter_mode=None,
use_normalized_coordinates=False,
read_as_integer=False):
if isinstance(address_mode, str):
address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
if address_mode is None:
address_mode = cuda.address_mode.BORDER
if filter_mode is None:
filter_mode = cuda.filter_mode.LINEAR
if isinstance(ndarray, np.ndarray):
cu_array = cuda.np_to_array(ndarray, 'C')
elif isinstance(ndarray, gpuarray.GPUArray):
cu_array = cuda.gpuarray_to_array(ndarray, 'C')
else:
raise TypeError(
'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray')
tex_ref.set_array(cu_array)
tex_ref.set_address_mode(0, address_mode)
if ndarray.ndim >= 2:
tex_ref.set_address_mode(1, address_mode)
if ndarray.ndim >= 3:
tex_ref.set_address_mode(2, address_mode)
tex_ref.set_filter_mode(filter_mode)
if not use_normalized_coordinates:
tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_NORMALIZED_COORDINATES)
if not read_as_integer:
tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER)
import functools
import itertools import itertools
import warnings import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
...@@ -105,14 +104,6 @@ class CreateKernelConfig: ...@@ -105,14 +104,6 @@ class CreateKernelConfig:
""" """
If set to `True`, auto can be used in the generated code for data types. This makes the type system more robust. If set to `True`, auto can be used in the generated code for data types. This makes the type system more robust.
""" """
opencl_queue: Any = None
"""
OpenCL queue if OpenCL target is used.
"""
opencl_ctx: Any = None
"""
OpenCL context if OpenCL target is used.
"""
index_fields: List[Field] = None index_fields: List[Field] = None
""" """
List of index fields, i.e. 1D fields with struct data type. If not `None`, `create_index_kernel` List of index fields, i.e. 1D fields with struct data type. If not `None`, `create_index_kernel`
...@@ -136,8 +127,6 @@ class CreateKernelConfig: ...@@ -136,8 +127,6 @@ class CreateKernelConfig:
self.backend = Backend.C self.backend = Backend.C
elif self.target == Target.GPU: elif self.target == Target.GPU:
self.backend = Backend.CUDA self.backend = Backend.CUDA
elif self.target == Target.OPENCL:
self.backend = Backend.OPENCL
else: else:
raise NotImplementedError(f'Target {self.target} has no default backend') raise NotImplementedError(f'Target {self.target} has no default backend')
...@@ -274,25 +263,14 @@ def create_domain_kernel(assignments: List[Assignment], *, config: CreateKernelC ...@@ -274,25 +263,14 @@ def create_domain_kernel(assignments: List[Assignment], *, config: CreateKernelC
raise ValueError("Blocking cannot be combined with cacheline-zeroing") raise ValueError("Blocking cannot be combined with cacheline-zeroing")
else: else:
raise ValueError("Invalid value for cpu_vectorize_info") raise ValueError("Invalid value for cpu_vectorize_info")
elif config.backend == Backend.LLVM: elif config.target == Target.GPU:
from pystencils.llvm import create_kernel if config.backend == Backend.CUDA:
ast = create_kernel(assignments, function_name=config.function_name, type_info=config.data_type,
split_groups=split_groups, iteration_slice=config.iteration_slice,
ghost_layers=config.ghost_layers)
elif config.target == Target.GPU or config.target == Target.OPENCL:
if config.backend == Backend.CUDA or config.backend == Backend.OPENCL:
from pystencils.gpucuda import create_cuda_kernel from pystencils.gpucuda import create_cuda_kernel
ast = create_cuda_kernel(assignments, function_name=config.function_name, type_info=config.data_type, ast = create_cuda_kernel(assignments, function_name=config.function_name, type_info=config.data_type,
indexing_creator=indexing_creator_from_params(config.gpu_indexing, indexing_creator=indexing_creator_from_params(config.gpu_indexing,
config.gpu_indexing_params), config.gpu_indexing_params),
iteration_slice=config.iteration_slice, ghost_layers=config.ghost_layers, iteration_slice=config.iteration_slice, ghost_layers=config.ghost_layers,
skip_independence_check=config.skip_independence_check) skip_independence_check=config.skip_independence_check)
if config.backend == Backend.OPENCL:
from pystencils.opencl.opencljit import make_python_function
ast._backend = config.backend
ast.compile = functools.partial(make_python_function, ast, config.opencl_queue, config.opencl_ctx)
ast._target = config.target
ast._backend = config.backend
if not ast: if not ast:
raise NotImplementedError( raise NotImplementedError(
...@@ -354,8 +332,8 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel ...@@ -354,8 +332,8 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel
coordinate_names=config.coordinate_names) coordinate_names=config.coordinate_names)
if config.cpu_openmp: if config.cpu_openmp:
add_openmp(ast, num_threads=config.cpu_openmp) add_openmp(ast, num_threads=config.cpu_openmp)
elif config.target == Target.GPU or config.target == Target.OPENCL: elif config.target == Target.GPU:
if config.backend == Backend.CUDA or config.backend == Backend.OPENCL: if config.backend == Backend.CUDA:
from pystencils.gpucuda import created_indexed_cuda_kernel from pystencils.gpucuda import created_indexed_cuda_kernel
idx_creator = indexing_creator_from_params(config.gpu_indexing, config.gpu_indexing_params) idx_creator = indexing_creator_from_params(config.gpu_indexing, config.gpu_indexing_params)
ast = created_indexed_cuda_kernel(assignments, ast = created_indexed_cuda_kernel(assignments,
...@@ -363,12 +341,6 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel ...@@ -363,12 +341,6 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel
type_info=config.data_type, type_info=config.data_type,
coordinate_names=config.coordinate_names, coordinate_names=config.coordinate_names,
indexing_creator=idx_creator) indexing_creator=idx_creator)
if config.backend == Backend.OPENCL:
from pystencils.opencl.opencljit import make_python_function
ast._backend = config.backend
ast.compile = functools.partial(make_python_function, ast, config.opencl_queue, config.opencl_ctx)
ast._target = config.target
ast._backend = config.backend
if not ast: if not ast:
raise NotImplementedError(f'Indexed kernels are not yet supported for {config.target} with {config.backend}') raise NotImplementedError(f'Indexed kernels are not yet supported for {config.target} with {config.backend}')
......