Skip to content
Snippets Groups Projects

WIP: Cuda autotune

Closed Stephan Seitz requested to merge seitz/pystencils:cuda-autotune into master
@@ -131,7 +131,7 @@ class AbstractIndexing(abc.ABC):
current_best = block_and_thread_numbers
print(f'{current_best} is the best out of {self._autotune_block_sizes or self.AUTOTUNE_BLOCK_SIZES}')
self._block_size = current_best
self._block_size = current_best['block']
return current_best
return _autotune_call_parameters(self,
call_shape,
@@ -158,7 +158,7 @@ class BlockIndexing(AbstractIndexing):
AUTOTUNE_NUM_CALLS = 10
def __init__(self, field, iteration_slice,
block_size=get_cuda_config()['preferred_block_size'],
block_size=tuple(get_cuda_config()['preferred_block_size']),
permute_block_size_dependent_on_layout=True,
compile_time_block_size=False,
@@ -308,6 +308,7 @@ class LineIndexing(AbstractIndexing):
self._coordinates = coordinates
self._iterationSlice = normalize_slice(iteration_slice, field.spatial_shape)
self._symbolicShape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape]
self._autotune_block_sizes = None
@property
def coordinates(self):