Merge branch 'RemoveOpenCL' into 'master'

Removed OpenCL See merge request !278

Merge branch 'RemoveOpenCL' into 'master'
30da6576 · Jan Hönig · 0ed1a87b · 9afc38bb · 30da6576 · 30da6576
Commit 30da6576 authored 3 years ago by Jan Hönig
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,3 +4,4 @@
 ### Removed
 * LLVM backend because it was not used much and not good integrated in pystencils.
+* OpenCL backend because it was not used much and not good integrated in pystencils.
--- a/README.md
+++ b/README.md
@@ -53,7 +53,6 @@ Without `[interactive]` you get a minimal version with very little dependencies.
 All options:
 - `gpu`: use this if an NVIDIA GPU is available and CUDA is installed
- `opencl`: basic OpenCL support (experimental)
 - `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
 - `bench_db`: functionality to store benchmark result in object databases
 - `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.

--- a/doc/notebooks/02_tutorial_basic_kernels.ipynb
+++ b/doc/notebooks/02_tutorial_basic_kernels.ipynb
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -47,7 +47,7 @@ def generate_c(ast_node: Node,
    Args:
        ast_node: ast representation of kernel
        signature_only: generate signature without function body
-        dialect: `Backend`: 'C', 'CUDA' or 'OPENCL'
+        dialect: `Backend`: 'C' or 'CUDA'
        custom_backend: use own custom printer for code generation
        with_globals: enable usage of global variables
    Returns:
@@ -71,9 +71,6 @@ def generate_c(ast_node: Node,
    elif dialect == Backend.CUDA:
        from pystencils.backends.cuda_backend import CudaBackend
        printer = CudaBackend(signature_only=signature_only)
-    elif dialect == Backend.OPENCL:
-        from pystencils.backends.opencl_backend import OpenClBackend
-        printer = OpenClBackend(signature_only=signature_only)
    else:
        raise ValueError(f'Unknown {dialect=}')
    code = printer(ast_node)

--- a/pystencils/backends/opencl1.1_known_functions.txt
+++ b/pystencils/backends/opencl1.1_known_functions.txt
-acos
-acosh
-acospi
-asin
-asinh
-asinpi
-atan
-atan2
-atanh
-atanpi
-atan2pi
-cbrt
-ceil
-copysign
-cos
-cosh
-cospi
-erfc
-erf
-exp
-exp2
-exp10
-expm1
-fabs
-fdim
-floor
-fma
-fmax
-fmax
-fmin45
-fmin
-fmod
-fract
-frexp
-hypot
-ilogb
-ldexp
-lgamma
-lgamma_r
-log
-log2
-log10
-log1p
-logb
-mad 
-maxmag 
-minmag 
-modf 
-nextafter
-pow 
-pown
-powr
-remquo 
-intn
-remquo
-rint
-rootn
-rootn
-round
-rsqrt
-sin
-sincos
-sinh
-sinpi
-sqrt
-tan
-tanh
-tanpi
-tgamma
-trunc
-half_cos 
-half_divide
-half_exp 
-half_exp2 
-half_exp10 
-half_log 
-half_log2 
-half_log10 
-half_powr 
-half_recip 
-half_rsqrt 
-half_sin 
-half_sqrt 
-half_tan 
-native_cos 
-native_divide
-native_exp 
-native_exp2 
-native_exp10 
-native_log 
-native_log2 
-native_log10 
-native_powr 
-native_recip 
-native_rsqrt 
-native_sin 
-native_sqrt 
-native_tan 
--- a/pystencils/backends/opencl_backend.py
+++ b/pystencils/backends/opencl_backend.py
-from os.path import dirname, join
-import pystencils.data_types
-from pystencils.astnodes import Node
-from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
-from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
-from pystencils.enums import Backend
-from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
-with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
-    lines = f.readlines()
-    OPENCL_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}
-def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str:
-    """Prints an abstract syntax tree node (made for `Target` 'GPU') as OpenCL code. # TODO Backend instead of Target?
-    Args:
-        ast_node: ast representation of kernel
-        signature_only: generate signature without function body
-        custom_backend: use own custom printer for code generation
-        with_globals: enable usage of global variables
-    Returns:
-        OpenCL code for the ast node and its descendants
-    """
-    return generate_c(ast_node, signature_only, dialect=Backend.OPENCL,
-                      custom_backend=custom_backend, with_globals=with_globals)
-class OpenClBackend(CudaBackend):
-    def __init__(self,
-                 sympy_printer=None,
-                 signature_only=False):
-        if not sympy_printer:
-            sympy_printer = OpenClSympyPrinter()
-        super().__init__(sympy_printer, signature_only)
-        self._dialect = Backend.OPENCL
-    def _print_Type(self, node):
-        code = super()._print_Type(node)
-        if isinstance(node, pystencils.data_types.PointerType):
-            return "__global " + code
-        else:
-            return code
-    def _print_ThreadBlockSynchronization(self, node):
-        raise NotImplementedError()
-    def _print_TextureDeclaration(self, node):
-        raise NotImplementedError()
-class OpenClSympyPrinter(CudaSympyPrinter):
-    language = "OpenCL"
-    DIMENSION_MAPPING = {
-        'x': '0',
-        'y': '1',
-        'z': '2'
-    }
-    INDEXING_FUNCTION_MAPPING = {
-        'blockIdx': 'get_group_id',
-        'threadIdx': 'get_local_id',
-        'blockDim': 'get_local_size',
-        'gridDim': 'get_global_size'
-    }
-    def __init__(self):
-        CustomSympyPrinter.__init__(self)
-        self.known_functions = OPENCL_KNOWN_FUNCTIONS
-    def _print_Type(self, node):
-        code = super()._print_Type(node)
-        if isinstance(node, pystencils.data_types.PointerType):
-            return "__global " + code
-        else:
-            return code
-    def _print_ThreadIndexingSymbol(self, node):
-        symbol_name: str = node.name
-        function_name, dimension = tuple(symbol_name.split("."))
-        dimension = self.DIMENSION_MAPPING[dimension]
-        function_name = self.INDEXING_FUNCTION_MAPPING[function_name]
-        return f"(int64_t) {function_name}({dimension})"
-    def _print_TextureAccess(self, node):
-        raise NotImplementedError()
-    # For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter
-    # since built-in math functions are generic.
-    # In CUDA, you have to differentiate between `sin` and `sinf`
-    try:
-        _print_math_func = CustomSympyPrinter._print_math_func
-    except AttributeError:
-        pass
-    _print_Pow = CustomSympyPrinter._print_Pow
-    def _print_Function(self, expr):
-        if isinstance(expr, fast_division):
-            return "native_divide(%s, %s)" % tuple(self._print(a) for a in expr.args)
-        elif isinstance(expr, fast_sqrt):
-            return f"native_sqrt({tuple(self._print(a) for a in expr.args)})"
-        elif isinstance(expr, fast_inv_sqrt):
-            return f"native_rsqrt({tuple(self._print(a) for a in expr.args)})"
-        return CustomSympyPrinter._print_Function(self, expr)
--- a/pystencils/datahandling/__init__.py
+++ b/pystencils/datahandling/__init__.py
@@ -23,8 +23,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
                         default_layout: str = 'SoA',
                         default_target: Target = Target.CPU,
                         parallel: bool = False,
-                         default_ghost_layers: int = 1,
+                         default_ghost_layers: int = 1) -> DataHandling:
-                         opencl_queue=None) -> DataHandling:
    """Creates a data handling instance.
    Args:
@@ -43,7 +42,6 @@ def create_data_handling(domain_size: Tuple[int, ...],
        default_target = new_target
    if parallel:
-        assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
        if wlb is None:
            raise ValueError("Cannot create parallel data handling because walberla module is not available")
@@ -71,8 +69,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
                                  periodicity=periodicity,
                                  default_target=default_target,
                                  default_layout=default_layout,
-                                  default_ghost_layers=default_ghost_layers,
+                                  default_ghost_layers=default_ghost_layers)
-                                  opencl_queue=opencl_queue)
 __all__ = ['create_data_handling']
--- a/pystencils/datahandling/datahandling_interface.py
+++ b/pystencils/datahandling/datahandling_interface.py
@@ -17,8 +17,8 @@ class DataHandling(ABC):
    'gather' function that has collects (parts of the) distributed data on a single process.
    """
-    _GPU_LIKE_TARGETS = [Target.GPU, Target.OPENCL]
+    _GPU_LIKE_TARGETS = [Target.GPU]
-    _GPU_LIKE_BACKENDS = [Backend.CUDA, Backend.OPENCL]
+    _GPU_LIKE_BACKENDS = [Backend.CUDA]
    # ---------------------------- Adding and accessing data -----------------------------------------------------------
    @property

--- a/pystencils/datahandling/pyopencl.py
+++ b/pystencils/datahandling/pyopencl.py
-try:
-    import pyopencl.array as gpuarray
-except ImportError:
-    gpuarray = None
-import numpy as np
-import pystencils
-class PyOpenClArrayHandler:
-    def __init__(self, queue):
-        if not queue:
-            from pystencils.opencl.opencljit import get_global_cl_queue
-            queue = get_global_cl_queue()
-        assert queue, "OpenCL queue missing!\n" \
-            "Use `import pystencils.opencl.autoinit` if you want it to be automatically created"
-        self.queue = queue
-    def zeros(self, shape, dtype=np.float64, order='C'):
-        cpu_array = np.zeros(shape=shape, dtype=dtype, order=order)
-        return self.to_gpu(cpu_array)
-    def ones(self, shape, dtype=np.float64, order='C'):
-        cpu_array = np.ones(shape=shape, dtype=dtype, order=order)
-        return self.to_gpu(cpu_array)
-    def empty(self, shape, dtype=np.float64, layout=None):
-        if layout:
-            cpu_array = pystencils.field.create_numpy_array_with_layout(shape=shape, dtype=dtype, layout=layout)
-            return self.to_gpu(cpu_array)
-        else:
-            return gpuarray.empty(self.queue, shape, dtype)
-    def to_gpu(self, array):
-        return gpuarray.to_device(self.queue, array)
-    def upload(self, gpuarray, numpy_array):
-        gpuarray.set(numpy_array, self.queue)
-    def download(self, gpuarray, numpy_array):
-        gpuarray.get(self.queue, numpy_array)
-    def randn(self, shape, dtype=np.float64):
-        cpu_array = np.random.randn(*shape).astype(dtype)
-        return self.from_numpy(cpu_array)
-    from_numpy = to_gpu
--- a/pystencils/datahandling/serial_datahandling.py
+++ b/pystencils/datahandling/serial_datahandling.py
@@ -7,7 +7,6 @@ import numpy as np
 from pystencils.datahandling.blockiteration import SerialBlock
 from pystencils.datahandling.datahandling_interface import DataHandling
 from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler
-from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
 from pystencils.enums import Target
 from pystencils.field import (
    Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple,
@@ -24,8 +23,6 @@ class SerialDataHandling(DataHandling):
                 default_layout: str = 'SoA',
                 periodicity: Union[bool, Sequence[bool]] = False,
                 default_target: Target = Target.CPU,
-                 opencl_queue=None,
-                 opencl_ctx=None,
                 array_handler=None) -> None:
        """
        Creates a data handling for single node simulations.
@@ -48,17 +45,12 @@ class SerialDataHandling(DataHandling):
        self.custom_data_cpu = DotDict()
        self.custom_data_gpu = DotDict()
        self._custom_data_transfer_functions = {}
-        self._opencl_queue = opencl_queue
-        self._opencl_ctx = opencl_ctx
        if not array_handler:
            try:
                self.array_handler = PyCudaArrayHandler()
            except Exception:
                self.array_handler = PyCudaNotAvailableHandler()
-            if default_target == Target.OPENCL or opencl_queue:
-                self.array_handler = PyOpenClArrayHandler(opencl_queue)
        else:
            self.array_handler = array_handler
@@ -280,8 +272,6 @@ class SerialDataHandling(DataHandling):
    def synchronization_function(self, names, stencil=None, target=None, functor=None, **_):
        if target is None:
            target = self.default_target
-        if target == Target.OPENCL:  # TODO potential misuse between Target and Backend
-            target = Target.GPU
        assert target in (Target.CPU, Target.GPU)
        if not hasattr(names, '__len__') or type(names) is str:
            names = [names]
@@ -324,16 +314,13 @@ class SerialDataHandling(DataHandling):
                else:
                    if functor is None:
                        from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor
-                        target = Target.GPU if not isinstance(self.array_handler,
+                        target = Target.GPU
-                                                              PyOpenClArrayHandler) else Target.OPENCL
                    result.append(functor(filtered_stencil, self._domainSize,
                                          index_dimensions=self.fields[name].index_dimensions,
                                          index_dim_shape=values_per_cell,
                                          dtype=self.fields[name].dtype.numpy_dtype,
                                          ghost_layers=gls,
-                                          target=target,
+                                          target=target))
-                                          opencl_queue=self._opencl_queue,
-                                          opencl_ctx=self._opencl_ctx))
        if target == Target.CPU:
            def result_functor():

--- a/pystencils/display_utils.py
+++ b/pystencils/display_utils.py
@@ -46,7 +46,7 @@ def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None)
    if isinstance(ast, KernelWrapper):
        ast = ast.ast
-    if ast.backend not in {Backend.C, Backend.CUDA, Backend.OPENCL}:
+    if ast.backend not in {Backend.C, Backend.CUDA}:
        raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}')
    dialect = ast.backend

--- a/pystencils/enums.py
+++ b/pystencils/enums.py
@@ -13,10 +13,6 @@ class Target(Enum):
    """
    Target GPU architecture.
    """
-    OPENCL = auto()
-    """
-    Target all architectures OpenCL covers (Thus both, Target and Backend)
-    """
 class Backend(Enum):
@@ -32,7 +28,3 @@ class Backend(Enum):
    """
    Use the CUDA backend to generate code for NVIDIA GPUs.
    """
-    OPENCL = auto()
-    """
-    Use the OpenCL backend to generate code for OpenCL.
-    """
--- a/pystencils/gpucuda/periodicity.py
+++ b/pystencils/gpucuda/periodicity.py
@@ -2,7 +2,6 @@ import numpy as np
 from itertools import product
 import pystencils.gpucuda
-import pystencils.opencl
 from pystencils import Assignment, Field
 from pystencils.gpucuda.kernelcreation import create_cuda_kernel
 from pystencils.enums import Target
@@ -32,19 +31,14 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in
 def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1,
-                                  thickness=None, dtype=float, target=Target.GPU, opencl_queue=None, opencl_ctx=None):
+                                  thickness=None, dtype=float, target=Target.GPU):
-    assert target in {Target.GPU, Target.OPENCL}
+    assert target in {Target.GPU}
    src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness)
    kernels = []
    for src_slice, dst_slice in src_dst_slice_tuples:
        ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype)
-        if target == pystencils.Target.GPU:
+        kernels.append(pystencils.gpucuda.make_python_function(ast))
-            kernels.append(pystencils.gpucuda.make_python_function(ast))
-        else:
-            ast._target = pystencils.Target.OPENCL
-            ast._backend = pystencils.Backend.OPENCL
-            kernels.append(pystencils.opencl.make_python_function(ast, opencl_queue, opencl_ctx))
    def functor(pdfs, **_):
        for kernel in kernels:

--- a/pystencils/gpucuda/texture_utils.py
+++ b/pystencils/gpucuda/texture_utils.py
-# -*- coding: utf-8 -*-
-#
-# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
-#
-# Distributed under terms of the GPLv3 license.
-"""
-"""
-from typing import Union
-import numpy as np
-try:
-    import pycuda.driver as cuda
-    from pycuda import gpuarray
-    import pycuda
-except Exception:
-    pass
-def ndarray_to_tex(tex_ref,  # type: Union[cuda.TextureReference, cuda.SurfaceReference]
-                   ndarray,
-                   address_mode=None,
-                   filter_mode=None,
-                   use_normalized_coordinates=False,
-                   read_as_integer=False):
-    if isinstance(address_mode, str):
-        address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
-    if address_mode is None:
-        address_mode = cuda.address_mode.BORDER
-    if filter_mode is None:
-        filter_mode = cuda.filter_mode.LINEAR
-    if isinstance(ndarray, np.ndarray):
-        cu_array = cuda.np_to_array(ndarray, 'C')
-    elif isinstance(ndarray, gpuarray.GPUArray):
-        cu_array = cuda.gpuarray_to_array(ndarray, 'C')
-    else:
-        raise TypeError(
-            'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray')
-    tex_ref.set_array(cu_array)
-    tex_ref.set_address_mode(0, address_mode)
-    if ndarray.ndim >= 2:
-        tex_ref.set_address_mode(1, address_mode)
-    if ndarray.ndim >= 3:
-        tex_ref.set_address_mode(2, address_mode)
-    tex_ref.set_filter_mode(filter_mode)
-    if not use_normalized_coordinates:
-        tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_NORMALIZED_COORDINATES)
-    if not read_as_integer:
-        tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER)
--- a/pystencils/kernelcreation.py
+++ b/pystencils/kernelcreation.py
-import functools
 import itertools
 import warnings
 from dataclasses import dataclass, field
@@ -105,14 +104,6 @@ class CreateKernelConfig:
    """
    If set to `True`, auto can be used in the generated code for data types. This makes the type system more robust.
    """
-    opencl_queue: Any = None
-    """
-    OpenCL queue if OpenCL target is used.
-    """
-    opencl_ctx: Any = None
-    """
-    OpenCL context if OpenCL target is used.
-    """
    index_fields: List[Field] = None
    """
    List of index fields, i.e. 1D fields with struct data type. If not `None`, `create_index_kernel`
@@ -136,8 +127,6 @@ class CreateKernelConfig:
                self.backend = Backend.C
            elif self.target == Target.GPU:
                self.backend = Backend.CUDA
-            elif self.target == Target.OPENCL:
-                self.backend = Backend.OPENCL
            else:
                raise NotImplementedError(f'Target {self.target} has no default backend')
@@ -274,20 +263,14 @@ def create_domain_kernel(assignments: List[Assignment], *, config: CreateKernelC
                        raise ValueError("Blocking cannot be combined with cacheline-zeroing")
                else:
                    raise ValueError("Invalid value for cpu_vectorize_info")
-    elif config.target == Target.GPU or config.target == Target.OPENCL:
+    elif config.target == Target.GPU:
-        if config.backend == Backend.CUDA or config.backend == Backend.OPENCL:
+        if config.backend == Backend.CUDA:
            from pystencils.gpucuda import create_cuda_kernel
            ast = create_cuda_kernel(assignments, function_name=config.function_name, type_info=config.data_type,
                                     indexing_creator=indexing_creator_from_params(config.gpu_indexing,
                                                                                   config.gpu_indexing_params),
                                     iteration_slice=config.iteration_slice, ghost_layers=config.ghost_layers,
                                     skip_independence_check=config.skip_independence_check)
-        if config.backend == Backend.OPENCL:
-            from pystencils.opencl.opencljit import make_python_function
-            ast._backend = config.backend
-            ast.compile = functools.partial(make_python_function, ast, config.opencl_queue, config.opencl_ctx)
-            ast._target = config.target
-            ast._backend = config.backend
    if not ast:
        raise NotImplementedError(
@@ -349,8 +332,8 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel
                                    coordinate_names=config.coordinate_names)
        if config.cpu_openmp:
            add_openmp(ast, num_threads=config.cpu_openmp)
-    elif config.target == Target.GPU or config.target == Target.OPENCL:
+    elif config.target == Target.GPU:
-        if config.backend == Backend.CUDA or config.backend == Backend.OPENCL:
+        if config.backend == Backend.CUDA:
            from pystencils.gpucuda import created_indexed_cuda_kernel
            idx_creator = indexing_creator_from_params(config.gpu_indexing, config.gpu_indexing_params)
            ast = created_indexed_cuda_kernel(assignments,
@@ -358,12 +341,6 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel
                                              type_info=config.data_type,
                                              coordinate_names=config.coordinate_names,
                                              indexing_creator=idx_creator)
-            if config.backend == Backend.OPENCL:
-                from pystencils.opencl.opencljit import make_python_function
-                ast._backend = config.backend
-                ast.compile = functools.partial(make_python_function, ast, config.opencl_queue, config.opencl_ctx)
-                ast._target = config.target
-                ast._backend = config.backend
    if not ast:
        raise NotImplementedError(f'Indexed kernels are not yet supported for {config.target} with {config.backend}')

--- a/pystencils/opencl/__init__.py
+++ b/pystencils/opencl/__init__.py
-"""
-"""
-from pystencils.opencl.opencljit import (
-    clear_global_ctx, init_globally, init_globally_with_context, make_python_function)
-__all__ = ['init_globally', 'init_globally_with_context', 'clear_global_ctx', 'make_python_function']
--- a/pystencils/opencl/autoinit.py
+++ b/pystencils/opencl/autoinit.py
-"""
-Automatically initializes OpenCL context using any device.
-Use `pystencils.opencl.{init_globally_with_context,init_globally}` if you want to use a specific device.
-"""
-from pystencils.opencl.opencljit import (
-    clear_global_ctx, init_globally, init_globally_with_context, make_python_function)
-__all__ = ['init_globally', 'init_globally_with_context', 'clear_global_ctx', 'make_python_function']
-try:
-    init_globally()
-except Exception as e:
-    import warnings
-    warnings.warn(str(e))
--- a/pystencils/opencl/opencljit.py
+++ b/pystencils/opencl/opencljit.py
-import numpy as np
-from pystencils.backends.cbackend import get_headers
-from pystencils.backends.opencl_backend import generate_opencl
-from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments
-from pystencils.include import get_pystencils_include_path
-from pystencils.kernel_wrapper import KernelWrapper
-USE_FAST_MATH = True
-_global_cl_ctx = None
-_global_cl_queue = None
-def get_global_cl_queue():
-    return _global_cl_queue
-def get_global_cl_ctx():
-    return _global_cl_ctx
-def init_globally(device_index=0):
-    import pyopencl as cl
-    global _global_cl_ctx
-    global _global_cl_queue
-    _global_cl_ctx = cl.create_some_context(device_index)
-    _global_cl_queue = cl.CommandQueue(_global_cl_ctx)
-def init_globally_with_context(opencl_ctx, opencl_queue):
-    global _global_cl_ctx
-    global _global_cl_queue
-    _global_cl_ctx = opencl_ctx
-    _global_cl_queue = opencl_queue
-def clear_global_ctx():
-    global _global_cl_ctx
-    global _global_cl_queue
-    _global_cl_ctx = None
-    _global_cl_queue = None
-def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None):
-    """
-    Creates a **OpenCL** kernel function from an abstract syntax tree which
-    was created for the ``target='Target.GPU'`` e.g. by :func:`pystencils.gpucuda.create_cuda_kernel`
-    or :func:`pystencils.gpucuda.created_indexed_cuda_kernel`
-    Args:
-        opencl_queue: a valid :class:`pyopencl.CommandQueue`
-        opencl_ctx: a valid :class:`pyopencl.Context`
-        kernel_function_node: the abstract syntax tree
-        argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
-                       returned kernel functor.
-    Returns:
-        compiled kernel as Python function
-    """
-    import pyopencl as cl
-    if not opencl_ctx:
-        opencl_ctx = _global_cl_ctx
-    if not opencl_queue:
-        opencl_queue = _global_cl_queue
-    assert opencl_ctx, "No valid OpenCL context!\n" \
-        "Use `import pystencils.opencl.autoinit` if you want it to be automatically created"
-    assert opencl_queue, "No valid OpenCL queue!\n" \
-        "Use `import pystencils.opencl.autoinit` if you want it to be automatically created"
-    if argument_dict is None:
-        argument_dict = {}
-    # check if double precision is supported and required
-    if any([d.double_fp_config == 0 for d in opencl_ctx.devices]):
-        for param in kernel_function_node.get_parameters():
-            if param.symbol.dtype.base_type:
-                if param.symbol.dtype.base_type.numpy_dtype == np.float64:
-                    raise ValueError('OpenCL device does not support double precision')
-            else:
-                if param.symbol.dtype.numpy_dtype == np.float64:
-                    raise ValueError('OpenCL device does not support double precision')
-    # Changing of kernel name necessary since compilation with default name "kernel" is not possible (OpenCL keyword!)
-    kernel_function_node.function_name = "opencl_" + kernel_function_node.function_name
-    header_list = ['"opencl_stdint.h"'] + list(get_headers(kernel_function_node))
-    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
-    code = includes + "\n"
-    code += "#define FUNC_PREFIX __kernel\n"
-    code += "#define RESTRICT restrict\n\n"
-    code += str(generate_opencl(kernel_function_node, custom_backend=custom_backend))
-    options = []
-    if USE_FAST_MATH:
-        options.append("-cl-unsafe-math-optimizations")
-        options.append("-cl-mad-enable")
-        options.append("-cl-fast-relaxed-math")
-        options.append("-cl-finite-math-only")
-    options.append("-I")
-    options.append(get_pystencils_include_path())
-    mod = cl.Program(opencl_ctx, code).build(options=options)
-    func = getattr(mod, kernel_function_node.function_name)
-    parameters = kernel_function_node.get_parameters()
-    cache = {}
-    cache_values = []
-    def wrapper(**kwargs):
-        key = hash(tuple((k, v.ctypes.data, v.strides, v.shape) if isinstance(v, np.ndarray) else (k, id(v))
-                         for k, v in kwargs.items()))
-        try:
-            args, block_and_thread_numbers = cache[key]
-            func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args)
-        except KeyError:
-            full_arguments = argument_dict.copy()
-            full_arguments.update(kwargs)
-            assert not any(isinstance(a, np.ndarray)
-                           for a in full_arguments.values()), 'Calling a OpenCL kernel with a Numpy array!'
-            assert not any('pycuda' in str(type(a))
-                           for a in full_arguments.values()), 'Calling a OpenCL kernel with a PyCUDA array!'
-            shape = _check_arguments(parameters, full_arguments)
-            indexing = kernel_function_node.indexing
-            block_and_thread_numbers = indexing.call_parameters(shape)
-            block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block'])
-            block_and_thread_numbers['grid'] = tuple(int(b * g) for (b, g) in zip(block_and_thread_numbers['block'],
-                                                                                  block_and_thread_numbers['grid']))
-            args = _build_numpy_argument_list(parameters, full_arguments)
-            args = [a.data if hasattr(a, 'data') else a for a in args]
-            cache[key] = (args, block_and_thread_numbers)
-            cache_values.append(kwargs)  # keep objects alive such that ids remain unique
-            func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args)
-    wrapper.ast = kernel_function_node
-    wrapper.parameters = kernel_function_node.get_parameters()
-    wrapper = KernelWrapper(wrapper, parameters, kernel_function_node)
-    return wrapper
--- a/pystencils/rng.py
+++ b/pystencils/rng.py
@@ -5,7 +5,6 @@ import sympy as sp
 from pystencils.data_types import TypedSymbol, cast_func
 from pystencils.astnodes import LoopOverCoordinate
 from pystencils.backends.cbackend import CustomCodeNode
-from pystencils.enums import Backend
 from pystencils.sympyextensions import fast_subs
@@ -54,8 +53,7 @@ class RNGBase(CustomCodeNode):
            else:
                code += f"{vector_instruction_set[r.dtype.base_name] if vector_instruction_set else r.dtype} " + \
                        f"{r.name};\n"
-        args = [print_arg(a) for a in self.args] + \
+        args = [print_arg(a) for a in self.args] + ['' + r.name for r in self.result_symbols]
-               [('&' if dialect == Backend.OPENCL else '') + r.name for r in self.result_symbols]
        code += (self._name + "(" + ", ".join(args) + ");\n")
        return code

--- a/pystencils_tests/test_create_kernel_config.py
+++ b/pystencils_tests/test_create_kernel_config.py
@@ -10,9 +10,6 @@ def test_create_kernel_config():
    c = ps.CreateKernelConfig(target=ps.Target.GPU)
    assert c.backend == ps.Backend.CUDA
-    c = ps.CreateKernelConfig(target=ps.Target.OPENCL)
-    assert c.backend == ps.Backend.OPENCL
    c = ps.CreateKernelConfig(backend=ps.Backend.CUDA)
    assert c.target == ps.Target.CPU
    assert c.backend == ps.Backend.CUDA