Skip to content
Snippets Groups Projects

GPU Indexing Schemes and Launch Configurations

Merged Frederik Hennig requested to merge fhennig/lambdas into v2.0-dev
All threads resolved!
5 files
+ 121
78
Compare changes
  • Side-by-side
  • Inline
Files
5
@@ -113,11 +113,11 @@ class Blockwise4DMapping(ThreadToIndexMapping):
@@ -113,11 +113,11 @@ class Blockwise4DMapping(ThreadToIndexMapping):
"""Blockwise index mapping for up to 4D iteration spaces, where the outer three dimensions
"""Blockwise index mapping for up to 4D iteration spaces, where the outer three dimensions
are mapped to block indices."""
are mapped to block indices."""
_indices_in_loop_order = [ # slowest to fastest
_indices_fastest_first = [ # slowest to fastest
BLOCK_IDX[2],
BLOCK_IDX[1],
BLOCK_IDX[0],
THREAD_IDX[0],
THREAD_IDX[0],
 
BLOCK_IDX[0],
 
BLOCK_IDX[1],
 
BLOCK_IDX[2]
]
]
def __call__(self, ispace: IterationSpace) -> dict[PsSymbol, PsExpression]:
def __call__(self, ispace: IterationSpace) -> dict[PsSymbol, PsExpression]:
@@ -141,7 +141,7 @@ class Blockwise4DMapping(ThreadToIndexMapping):
@@ -141,7 +141,7 @@ class Blockwise4DMapping(ThreadToIndexMapping):
dimensions = ispace.dimensions_in_loop_order()
dimensions = ispace.dimensions_in_loop_order()
idx_map: dict[PsSymbol, PsExpression] = dict()
idx_map: dict[PsSymbol, PsExpression] = dict()
for dim, tid in zip(dimensions, self._indices_in_loop_order):
for dim, tid in zip(dimensions[::-1], self._indices_fastest_first):
idx_map[dim.counter] = dim.start + dim.step * PsCast(
idx_map[dim.counter] = dim.start + dim.step * PsCast(
deconstify(dim.counter.get_dtype()), tid
deconstify(dim.counter.get_dtype()), tid
)
)
@@ -152,7 +152,7 @@ class Blockwise4DMapping(ThreadToIndexMapping):
@@ -152,7 +152,7 @@ class Blockwise4DMapping(ThreadToIndexMapping):
self, ispace: SparseIterationSpace
self, ispace: SparseIterationSpace
) -> dict[PsSymbol, PsExpression]:
) -> dict[PsSymbol, PsExpression]:
sparse_ctr = PsExpression.make(ispace.sparse_counter)
sparse_ctr = PsExpression.make(ispace.sparse_counter)
thread_idx = self._indices_in_loop_order[-1]
thread_idx = self._indices_fastest_first[0]
idx_map: dict[PsSymbol, PsExpression] = {
idx_map: dict[PsSymbol, PsExpression] = {
ispace.sparse_counter: PsCast(
ispace.sparse_counter: PsCast(
deconstify(sparse_ctr.get_dtype()), thread_idx
deconstify(sparse_ctr.get_dtype()), thread_idx
Loading