Skip to content
Snippets Groups Projects

Remove OpenCL support

Merged Markus Holzer requested to merge holzer/lbmpy:RemoveOpenCL into master
Viewing commit 5a091f03
Next
Show latest version
8 files
+ 27
66
Preferences
Compare changes
Files
8
@@ -104,8 +104,7 @@ def get_communication_slices(
def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
domain_size=None, target=Target.GPU,
opencl_queue=None, opencl_ctx=None):
domain_size=None, target=Target.GPU):
"""Copies a rectangular array slice onto another non-overlapping array slice"""
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
@@ -136,9 +135,6 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
if target == Target.GPU:
from pystencils.gpucuda import make_python_function
return make_python_function(ast)
elif target == Target.OPENCL:
from pystencils.opencl import make_python_function
return make_python_function(ast, opencl_queue, opencl_ctx)
else:
raise ValueError('Invalid target:', target)
@@ -147,22 +143,17 @@ class LBMPeriodicityHandling:
def __init__(self, stencil, data_handling, pdf_field_name,
streaming_pattern='pull', ghost_layers=1,
opencl_queue=None, opencl_ctx=None,
pycuda_direct_copy=True):
"""
Periodicity Handling for Lattice Boltzmann Streaming.
**On the usage with cuda/opencl:**
**On the usage with cuda:**
- pycuda allows the copying of sliced arrays within device memory using the numpy syntax,
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case.
- pyopencl does not support copying of non-contiguous sliced arrays, so the usage of compiled
copy kernels is forced on us. On the positive side, compilation of the OpenCL kernels appears
to be about four times faster.
"""
if not isinstance(data_handling, SerialDataHandling):
raise ValueError('Only serial data handling is supported!')
@@ -172,7 +163,7 @@ class LBMPeriodicityHandling:
self.dh = data_handling
target = data_handling.default_target
assert target in [Target.CPU, Target.GPU, Target.OPENCL]
assert target in [Target.CPU, Target.GPU]
self.pdf_field_name = pdf_field_name
self.ghost_layers = ghost_layers
@@ -180,8 +171,6 @@ class LBMPeriodicityHandling:
self.inplace_pattern = is_inplace(streaming_pattern)
self.target = target
self.cpu = target == Target.CPU
self.opencl_queue = opencl_queue
self.opencl_ctx = opencl_ctx
self.pycuda_direct_copy = target == Target.GPU and pycuda_direct_copy
def is_copy_direction(direction):
@@ -205,7 +194,7 @@ class LBMPeriodicityHandling:
ghost_layers=ghost_layers)
self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
if target == Target.OPENCL or (target == Target.GPU and not pycuda_direct_copy):
if target == Target.GPU and not pycuda_direct_copy:
self.device_copy_kernels = []
for timestep in timesteps:
self.device_copy_kernels.append(self._compile_copy_kernels(timestep))
@@ -227,9 +216,7 @@ class LBMPeriodicityHandling:
kernels = []
for src, dst in self.comm_slices[timestep.idx]:
kernels.append(
periodic_pdf_copy_kernel(
pdf_field, src, dst, target=self.target,
opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx))
periodic_pdf_copy_kernel(pdf_field, src, dst, target=self.target))
return kernels
def _periodicity_handling_gpu(self, prev_timestep):