Skip to content
Snippets Groups Projects
Commit fa30c13c authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Improve OpenCL integration via create_kernel

parent 296de5db
Branches
No related tags found
No related merge requests found
...@@ -45,7 +45,12 @@ def show_code(ast: KernelFunction, custom_backend=None): ...@@ -45,7 +45,12 @@ def show_code(ast: KernelFunction, custom_backend=None):
if isinstance(ast, KernelWrapper): if isinstance(ast, KernelWrapper):
ast = ast.ast ast = ast.ast
dialect = 'cuda' if ast.backend == 'gpucuda' else 'c' if ast.backend == 'gpucuda':
dialect = 'cuda'
elif ast.backend == 'opencl':
dialect = 'opencl'
else:
dialect = 'c'
class CodeDisplay: class CodeDisplay:
def __init__(self, ast_input): def __init__(self, ast_input):
......
from types import MappingProxyType import functools
from itertools import combinations from itertools import combinations
from types import MappingProxyType
import sympy as sp import sympy as sp
...@@ -27,13 +28,15 @@ def create_kernel(assignments, ...@@ -27,13 +28,15 @@ def create_kernel(assignments,
gpu_indexing_params=MappingProxyType({}), gpu_indexing_params=MappingProxyType({}),
use_textures_for_interpolation=True, use_textures_for_interpolation=True,
cpu_prepend_optimizations=[], cpu_prepend_optimizations=[],
use_auto_for_assignments=False): use_auto_for_assignments=False,
opencl_queue=None,
opencl_ctx=None):
""" """
Creates abstract syntax tree (AST) of kernel, using a list of update equations. Creates abstract syntax tree (AST) of kernel, using a list of update equations.
Args: Args:
assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection` assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
target: 'cpu', 'llvm' or 'gpu' target: 'cpu', 'llvm', 'gpu' or 'opencl'
data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
to type to type
iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \ iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
...@@ -105,15 +108,22 @@ def create_kernel(assignments, ...@@ -105,15 +108,22 @@ def create_kernel(assignments,
raise ValueError("Invalid value for cpu_vectorize_info") raise ValueError("Invalid value for cpu_vectorize_info")
elif target == 'llvm': elif target == 'llvm':
from pystencils.llvm import create_kernel from pystencils.llvm import create_kernel
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers) iteration_slice=iteration_slice, ghost_layers=ghost_layers)
elif target == 'gpu': elif target == 'gpu' or target == 'opencl':
from pystencils.gpucuda import create_cuda_kernel from pystencils.gpucuda import create_cuda_kernel
ast = create_cuda_kernel(assignments, type_info=data_type, ast = create_cuda_kernel(assignments, type_info=data_type,
indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params), indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
iteration_slice=iteration_slice, ghost_layers=ghost_layers, iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check, skip_independence_check=skip_independence_check,
use_textures_for_interpolation=use_textures_for_interpolation) use_textures_for_interpolation=use_textures_for_interpolation)
if target == 'opencl':
from pystencils.opencl.opencljit import make_python_function
ast._backend = 'opencl'
ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
ast._target = 'opencl'
ast._backend = 'opencl'
else: else:
raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,)) raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))
......
...@@ -3,10 +3,31 @@ import numpy as np ...@@ -3,10 +3,31 @@ import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments
from pystencils.include import get_pystencils_include_path from pystencils.include import get_pystencils_include_path
from pystencils.kernel_wrapper import KernelWrapper
USE_FAST_MATH = True USE_FAST_MATH = True
_global_cl_ctx = None
_global_cl_queue = None
def get_global_cl_queue():
return _global_cl_queue
def get_global_cl_ctx():
return _global_cl_ctx
def init_globally(device_index=0):
import pyopencl as cl
global _global_cl_ctx
global _global_cl_queue
_global_cl_ctx = cl.create_some_context(device_index)
_global_cl_queue = cl.CommandQueue(_global_cl_ctx)
def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None): def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None):
""" """
Creates a **OpenCL** kernel function from an abstract syntax tree which Creates a **OpenCL** kernel function from an abstract syntax tree which
...@@ -24,6 +45,12 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen ...@@ -24,6 +45,12 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen
compiled kernel as Python function compiled kernel as Python function
""" """
import pyopencl as cl import pyopencl as cl
if not opencl_ctx:
opencl_ctx = _global_cl_ctx
if not opencl_queue:
opencl_queue = _global_cl_queue
assert opencl_ctx, "No valid OpenCL context" assert opencl_ctx, "No valid OpenCL context"
assert opencl_queue, "No valid OpenCL queue" assert opencl_queue, "No valid OpenCL queue"
...@@ -90,4 +117,5 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen ...@@ -90,4 +117,5 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen
wrapper.ast = kernel_function_node wrapper.ast = kernel_function_node
wrapper.parameters = kernel_function_node.get_parameters() wrapper.parameters = kernel_function_node.get_parameters()
wrapper = KernelWrapper(wrapper, parameters, kernel_function_node)
return wrapper return wrapper
...@@ -5,7 +5,7 @@ import pystencils ...@@ -5,7 +5,7 @@ import pystencils
import sympy as sp import sympy as sp
from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.cuda_backend import CudaBackend
from pystencils.backends.opencl_backend import OpenClBackend from pystencils.backends.opencl_backend import OpenClBackend
from pystencils.opencl.opencljit import make_python_function from pystencils.opencl.opencljit import make_python_function, init_globally, get_global_cl_queue
try: try:
import pyopencl as cl import pyopencl as cl
...@@ -233,3 +233,41 @@ def test_without_cuda(): ...@@ -233,3 +233,41 @@ def test_without_cuda():
opencl_kernel = make_python_function(ast, queue, ctx) opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z) opencl_kernel(x=x, y=y, z=z)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_kernel_creation():
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
init_globally()
ast = pystencils.create_kernel(assignments, target='opencl')
print(ast.backend)
code = str(pystencils.show_code(ast))
print(code)
assert 'get_local_size' in code
opencl_kernel = ast.compile()
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
assert get_global_cl_queue()
x = array.to_device(get_global_cl_queue(), x_cpu)
y = array.to_device(get_global_cl_queue(), y_cpu)
z = array.to_device(get_global_cl_queue(), z_cpu)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment