-
Michael Kuron authoredMichael Kuron authored
test_opencl.py 6.92 KiB
import numpy as np
import pytest
import sympy as sp
import pystencils
from pystencils.backends.cuda_backend import CudaBackend
from pystencils.backends.opencl_backend import OpenClBackend
from pystencils.opencl.opencljit import get_global_cl_queue, make_python_function
try:
import pyopencl as cl
HAS_OPENCL = True
import pystencils.opencl.autoinit
except Exception:
HAS_OPENCL = False
def test_print_opencl():
z, y, x = pystencils.fields("z, y, x: [2d]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
pystencils.show_code(ast, custom_backend=CudaBackend())
opencl_code = pystencils.get_code_str(ast, custom_backend=OpenClBackend())
print(opencl_code)
assert "__global double * RESTRICT const _data_x" in str(opencl_code)
assert "__global double * RESTRICT" in str(opencl_code)
assert "get_local_id(0)" in str(opencl_code)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit_fixed_size():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
code = pystencils.show_code(ast, custom_backend=CudaBackend())
print(code)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [2d]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
pystencils.show_code(ast, custom_backend=CudaBackend())
pystencils.show_code(ast, custom_backend=OpenClBackend())
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit_with_parameter():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [2d]")
a = sp.Symbol('a')
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
code = pystencils.show_code(ast, custom_backend=CudaBackend())
print(code)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z, a=5.)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z, a=5.)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_without_cuda():
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_kernel_creation():
global pystencils
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
import pystencils.opencl.autoinit
ast = pystencils.create_kernel(assignments, target='opencl')
print(ast.backend)
code = pystencils.get_code_str(ast)
print(code)
assert 'get_local_size' in code
opencl_kernel = ast.compile()
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
assert get_global_cl_queue()
x = array.to_device(get_global_cl_queue(), x_cpu)
y = array.to_device(get_global_cl_queue(), y_cpu)
z = array.to_device(get_global_cl_queue(), z_cpu)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)