Skip to content
Snippets Groups Projects

WIP: Cuda autotune

Closed Stephan Seitz requested to merge seitz/pystencils:cuda-autotune into master
@@ -290,6 +290,8 @@ class LineIndexing(AbstractIndexing):
@@ -290,6 +290,8 @@ class LineIndexing(AbstractIndexing):
This indexing scheme supports up to 4 spatial dimensions, where the innermost dimensions is not larger than the
This indexing scheme supports up to 4 spatial dimensions, where the innermost dimensions is not larger than the
maximum amount of threads allowed in a CUDA block (which depends on device).
maximum amount of threads allowed in a CUDA block (which depends on device).
"""
"""
 
AUTOTUNE_BLOCK_SIZES = ((16, 1, 1), (32, 1, 1), (64, 1, 1), (96, 1, 1), (128, 1, 1), (160, 1, 1), (192, 1, 1),)
 
AUTOTUNE_NUM_CALLS = 10
def __init__(self, field, iteration_slice):
def __init__(self, field, iteration_slice):
available_indices = [THREAD_IDX[0]] + BLOCK_IDX
available_indices = [THREAD_IDX[0]] + BLOCK_IDX