Improve OpenCL integration via create_kernel

65c8db0a · Stephan Seitz · 974febd7 · 65c8db0a · 65c8db0a · 65c8db0a
Commit 65c8db0a authored 5 years ago by Stephan Seitz
--- a/pystencils/display_utils.py
+++ b/pystencils/display_utils.py
@@ -45,7 +45,12 @@ def show_code(ast: KernelFunction, custom_backend=None):
    if isinstance(ast, KernelWrapper):
        ast = ast.ast

-    dialect = 'cuda' if ast.backend == 'gpucuda' else 'c'
+    if ast.backend == 'gpucuda':
+        dialect = 'cuda'
+    elif ast.backend == 'opencl':
+        dialect = 'opencl'
+    else:
+        dialect = 'c'

    class CodeDisplay:
        def __init__(self, ast_input):

--- a/pystencils/kernelcreation.py
+++ b/pystencils/kernelcreation.py
@@ -10,6 +10,7 @@ from pystencils.gpucuda.indexing import indexing_creator_from_params
 from pystencils.simp.assignment_collection import AssignmentCollection
 from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
+import functools


 def create_kernel(assignments,
@@ -23,13 +24,15 @@ def create_kernel(assignments,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({}),
-                  use_textures_for_interpolation=True):
+                  use_textures_for_interpolation=True,
+                  opencl_queue=None,
+                  opencl_ctx=None):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
-        target: 'cpu', 'llvm' or 'gpu'
+        target: 'cpu', 'llvm', 'gpu' or 'opencl'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
@@ -102,13 +105,19 @@ def create_kernel(assignments,
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
        return ast
-    elif target == 'gpu':
+    elif target == 'gpu' or target == 'opencl':
        from pystencils.gpucuda import create_cuda_kernel
        ast = create_cuda_kernel(assignments, type_info=data_type,
                                 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
                                 iteration_slice=iteration_slice, ghost_layers=ghost_layers,
                                 skip_independence_check=skip_independence_check,
                                 use_textures_for_interpolation=use_textures_for_interpolation)
+        if target == 'opencl':
+            from pystencils.opencl.opencljit import make_python_function
+            ast._backend = 'opencl'
+            ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
+            ast._target = 'opencl'
+            ast._backend = 'opencl'
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))

--- a/pystencils/opencl/opencljit.py
+++ b/pystencils/opencl/opencljit.py
@@ -3,10 +3,31 @@ import numpy as np
 from pystencils.backends.cbackend import generate_c, get_headers
 from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments
 from pystencils.include import get_pystencils_include_path
+from pystencils.kernel_wrapper import KernelWrapper

 USE_FAST_MATH = True


+_global_cl_ctx = None
+_global_cl_queue = None
+
+
+def get_global_cl_queue():
+    return _global_cl_queue
+
+
+def get_global_cl_ctx():
+    return _global_cl_ctx
+
+
+def init_globally(device_index=0):
+    import pyopencl as cl
+    global _global_cl_ctx
+    global _global_cl_queue
+    _global_cl_ctx = cl.create_some_context(device_index)
+    _global_cl_queue = cl.CommandQueue(_global_cl_ctx)
+
+
 def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None):
    """
    Creates a **OpenCL** kernel function from an abstract syntax tree which
@@ -24,6 +45,12 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen
        compiled kernel as Python function
    """
    import pyopencl as cl
+
+    if not opencl_ctx:
+        opencl_ctx = _global_cl_ctx
+    if not opencl_queue:
+        opencl_queue = _global_cl_queue
+
    assert opencl_ctx, "No valid OpenCL context"
    assert opencl_queue, "No valid OpenCL queue"

@@ -90,4 +117,5 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen

    wrapper.ast = kernel_function_node
    wrapper.parameters = kernel_function_node.get_parameters()
+    wrapper = KernelWrapper(wrapper, parameters, kernel_function_node)
    return wrapper
--- a/pystencils_tests/test_opencl.py
+++ b/pystencils_tests/test_opencl.py
@@ -5,7 +5,7 @@ import pystencils
 import sympy as sp
 from pystencils.backends.cuda_backend import CudaBackend
 from pystencils.backends.opencl_backend import OpenClBackend
-from pystencils.opencl.opencljit import make_python_function
+from pystencils.opencl.opencljit import make_python_function, init_globally, get_global_cl_queue

 try:
    import pyopencl as cl
@@ -233,3 +233,41 @@ def test_without_cuda():
    opencl_kernel = make_python_function(ast, queue, ctx)
    assert opencl_kernel is not None
    opencl_kernel(x=x, y=y, z=z)
+
+
+
+@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
+def test_kernel_creation():
+    z, y, x = pystencils.fields("z, y, x: [20,30]")
+
+    assignments = pystencils.AssignmentCollection({
+        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
+    })
+
+    print(assignments)
+
+
+    init_globally()
+    ast = pystencils.create_kernel(assignments, target='opencl')
+
+    print(ast.backend)
+
+    code = str(pystencils.show_code(ast))
+    print(code)
+    assert 'get_local_size' in code
+
+    opencl_kernel = ast.compile()
+
+    x_cpu = np.random.rand(20, 30)
+    y_cpu = np.random.rand(20, 30)
+    z_cpu = np.random.rand(20, 30)
+
+    import pyopencl.array as array
+    assert get_global_cl_queue()
+    x = array.to_device(get_global_cl_queue(), x_cpu)
+    y = array.to_device(get_global_cl_queue(), y_cpu)
+    z = array.to_device(get_global_cl_queue(), z_cpu)
+
+    assert opencl_kernel is not None
+    opencl_kernel(x=x, y=y, z=z)
+