Skip to content
Snippets Groups Projects

GPU Indexing Schemes and Launch Configurations

Merged Frederik Hennig requested to merge fhennig/lambdas into v2.0-dev
4 files
+ 149
3
Compare changes
  • Side-by-side
  • Inline
Files
4
from __future__ import annotations
from typing import TYPE_CHECKING
from abc import abstractmethod
from abc import ABC, abstractmethod
from ..ast.expressions import PsExpression
from ..ast.structural import PsBlock
@@ -16,6 +16,26 @@ if TYPE_CHECKING:
from ...codegen.kernel import GpuThreadsRange
class WorkItemMapping(ABC):
"""Signature for work-item mappings used to modify the thread index mapping behavior"""
@abstractmethod
def __call__(
self,
block_idx: tuple[PsExpression, PsExpression, PsExpression],
thread_idx: tuple[PsExpression, PsExpression, PsExpression],
ispace_rank: int,
) -> tuple[PsExpression, ...]:
"""Compute a work item index from the current block index, thread index, and iteration space dimensionality.
Implementations of this method must return a tuple with `ispace_rank` entries,
containing expressions for the compressed index of the work item identified by the
given GPU block and thread index triples.
(The *compressed index* is the work item's index before application
of the iteration space's lower limits and strides.)
"""
class GenericGpu(Platform):
@abstractmethod
def materialize_iteration_space(
@@ -38,13 +58,13 @@ class GenericGpu(Platform):
@classmethod
def _threads_from_full_ispace(cls, ispace: FullIterationSpace) -> GpuThreadsRange:
from ...codegen.kernel import GpuThreadsRange
dimensions = ispace.dimensions_in_loop_order()[::-1]
if len(dimensions) > 3:
raise NotImplementedError(
f"Cannot create a GPU threads range for an {len(dimensions)}-dimensional iteration space"
)
from ..ast.analysis import collect_undefined_symbols as collect
for dim in dimensions:
Loading