Skip to content
Snippets Groups Projects
Commit 65c8db0a authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Improve OpenCL integration via create_kernel

parent 974febd7
Branches
No related merge requests found
......@@ -45,7 +45,12 @@ def show_code(ast: KernelFunction, custom_backend=None):
if isinstance(ast, KernelWrapper):
ast = ast.ast
dialect = 'cuda' if ast.backend == 'gpucuda' else 'c'
if ast.backend == 'gpucuda':
dialect = 'cuda'
elif ast.backend == 'opencl':
dialect = 'opencl'
else:
dialect = 'c'
class CodeDisplay:
def __init__(self, ast_input):
......
......@@ -10,6 +10,7 @@ from pystencils.gpucuda.indexing import indexing_creator_from_params
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.transformations import (
loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
import functools
def create_kernel(assignments,
......@@ -23,13 +24,15 @@ def create_kernel(assignments,
cpu_blocking=None,
gpu_indexing='block',
gpu_indexing_params=MappingProxyType({}),
use_textures_for_interpolation=True):
use_textures_for_interpolation=True,
opencl_queue=None,
opencl_ctx=None):
"""
Creates abstract syntax tree (AST) of kernel, using a list of update equations.
Args:
assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
target: 'cpu', 'llvm' or 'gpu'
target: 'cpu', 'llvm', 'gpu' or 'opencl'
data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
to type
iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
......@@ -102,13 +105,19 @@ def create_kernel(assignments,
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers)
return ast
elif target == 'gpu':
elif target == 'gpu' or target == 'opencl':
from pystencils.gpucuda import create_cuda_kernel
ast = create_cuda_kernel(assignments, type_info=data_type,
indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check,
use_textures_for_interpolation=use_textures_for_interpolation)
if target == 'opencl':
from pystencils.opencl.opencljit import make_python_function
ast._backend = 'opencl'
ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
ast._target = 'opencl'
ast._backend = 'opencl'
return ast
else:
raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))
......
......@@ -3,10 +3,31 @@ import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments
from pystencils.include import get_pystencils_include_path
from pystencils.kernel_wrapper import KernelWrapper
USE_FAST_MATH = True
_global_cl_ctx = None
_global_cl_queue = None
def get_global_cl_queue():
return _global_cl_queue
def get_global_cl_ctx():
return _global_cl_ctx
def init_globally(device_index=0):
import pyopencl as cl
global _global_cl_ctx
global _global_cl_queue
_global_cl_ctx = cl.create_some_context(device_index)
_global_cl_queue = cl.CommandQueue(_global_cl_ctx)
def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None):
"""
Creates a **OpenCL** kernel function from an abstract syntax tree which
......@@ -24,6 +45,12 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen
compiled kernel as Python function
"""
import pyopencl as cl
if not opencl_ctx:
opencl_ctx = _global_cl_ctx
if not opencl_queue:
opencl_queue = _global_cl_queue
assert opencl_ctx, "No valid OpenCL context"
assert opencl_queue, "No valid OpenCL queue"
......@@ -90,4 +117,5 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen
wrapper.ast = kernel_function_node
wrapper.parameters = kernel_function_node.get_parameters()
wrapper = KernelWrapper(wrapper, parameters, kernel_function_node)
return wrapper
......@@ -5,7 +5,7 @@ import pystencils
import sympy as sp
from pystencils.backends.cuda_backend import CudaBackend
from pystencils.backends.opencl_backend import OpenClBackend
from pystencils.opencl.opencljit import make_python_function
from pystencils.opencl.opencljit import make_python_function, init_globally, get_global_cl_queue
try:
import pyopencl as cl
......@@ -233,3 +233,41 @@ def test_without_cuda():
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_kernel_creation():
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
init_globally()
ast = pystencils.create_kernel(assignments, target='opencl')
print(ast.backend)
code = str(pystencils.show_code(ast))
print(code)
assert 'get_local_size' in code
opencl_kernel = ast.compile()
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
assert get_global_cl_queue()
x = array.to_device(get_global_cl_queue(), x_cpu)
y = array.to_device(get_global_cl_queue(), y_cpu)
z = array.to_device(get_global_cl_queue(), z_cpu)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment