Skip to content
Snippets Groups Projects
Commit 9a15d1f7 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

Implement Lambda and GpuLaunchGridConstraints. Start on backend WorkItemMapping.

parent 4876f0b7
No related branches found
No related tags found
1 merge request!449GPU Indexing Schemes and Launch Configurations
Pipeline #73846 passed
from __future__ import annotations
from typing import TYPE_CHECKING
from abc import abstractmethod
from abc import ABC, abstractmethod
from ..ast.expressions import PsExpression
from ..ast.structural import PsBlock
......@@ -16,6 +16,26 @@ if TYPE_CHECKING:
from ...codegen.kernel import GpuThreadsRange
class WorkItemMapping(ABC):
"""Signature for work-item mappings used to modify the thread index mapping behavior"""
@abstractmethod
def __call__(
self,
block_idx: tuple[PsExpression, PsExpression, PsExpression],
thread_idx: tuple[PsExpression, PsExpression, PsExpression],
ispace_rank: int,
) -> tuple[PsExpression, ...]:
"""Compute a work item index from the current block index, thread index, and iteration space dimensionality.
Implementations of this method must return a tuple with `ispace_rank` entries,
containing expressions for the compressed index of the work item identified by the
given GPU block and thread index triples.
(The *compressed index* is the work item's index before application
of the iteration space's lower limits and strides.)
"""
class GenericGpu(Platform):
@abstractmethod
def materialize_iteration_space(
......@@ -38,13 +58,13 @@ class GenericGpu(Platform):
@classmethod
def _threads_from_full_ispace(cls, ispace: FullIterationSpace) -> GpuThreadsRange:
from ...codegen.kernel import GpuThreadsRange
dimensions = ispace.dimensions_in_loop_order()[::-1]
if len(dimensions) > 3:
raise NotImplementedError(
f"Cannot create a GPU threads range for an {len(dimensions)}-dimensional iteration space"
)
from ..ast.analysis import collect_undefined_symbols as collect
for dim in dimensions:
......
......@@ -3,6 +3,7 @@ from __future__ import annotations
from warnings import warn
from abc import ABC
from collections.abc import Collection
from enum import Enum, auto
from typing import TYPE_CHECKING, Sequence, Generic, TypeVar, Callable, Any, cast
from dataclasses import dataclass, InitVar, fields
......@@ -331,6 +332,35 @@ class CpuOptions(ConfigBase):
"""
class GpuIndexingScheme(Enum):
"""Available index translation schemes for GPU kernels."""
Linear3D = auto()
"""Map coordinates to global thread indices.
Supports up to three-dimensional iteration spaces.
For each dimension (with known start, stop and step values), compute the current iteration
point as ``start + step * (blockIdx.c * blockDim.c * threadDim.c)``
(where c :math:`\\in` (x, y, z)).
"""
Blockwise4D = auto()
"""On a 3D grid of 1D blocks, map the fastest coordinate onto the intra-block thread index,
and slower coordinates onto the block index.
Supports up to four-dimensional iteration spaces.
Using this indexing scheme, the iteration counters of up to four dimensions are assigned
like follows, from slowest to fastest:
.. code-block:: C++
ctr_3 = blockIdx.z;
ctr_2 = blockIdx.y;
ctr_1 = blockIdx.x;
ctr_0 = threadIDx.x;
"""
@dataclass
class GpuOptions(ConfigBase):
"""Configuration options specific to GPU targets."""
......
from __future__ import annotations
from itertools import chain
from .lambdas import Lambda
from .parameters import Parameter
_ConstraintTriple = tuple[Lambda | None, Lambda | None, Lambda | None]
class GpuLaunchGridConstraints:
"""Constraints on the number of threads and blocks on the GPU launch grid for a given kernel.
This constraints set determines all or some of
the number of threads on a GPU block as well as the number of blocks on the GPU grid,
statically or depending on runtime parameters.
"""
def __init__(
self,
block_size: _ConstraintTriple | None = None,
grid_size: _ConstraintTriple | None = None,
) -> None:
self._block_size: _ConstraintTriple = (
(None, None, None) if block_size is None else block_size
)
self._grid_size: _ConstraintTriple = (
(None, None, None) if grid_size is None else grid_size
)
params = set()
for constr in chain(self._block_size, self._grid_size):
if constr is not None:
params |= set(constr.parameters)
self._params = frozenset(params)
@property
def parameters(self) -> frozenset[Parameter]:
"""Parameters to this set of constraints"""
return self._params
@property
def block_size(self) -> _ConstraintTriple:
"""Constraints on the number of threads per block"""
return self._block_size
@property
def grid_size(self) -> _ConstraintTriple:
"""Constraints on the number of blocks on the grid"""
return self._grid_size
from __future__ import annotations
from typing import Sequence
import numpy as np
from .parameters import Parameter
from ..types import PsType
from ..backend.ast.expressions import PsExpression
class Lambda:
"""A one-line function emitted by the code generator as an auxiliary object."""
def __init__(self, expr: PsExpression, params: Sequence[Parameter]):
self._expr = expr
self._params = tuple(params)
self._return_type = expr.get_dtype()
@property
def parameters(self) -> tuple[Parameter, ...]:
"""Parameters to this lambda"""
return self._params
@property
def return_type(self) -> PsType:
"""Return type of this lambda"""
return self._return_type
def __call__(self, **kwargs) -> np.generic:
"""Evaluate this lambda with the given arguments.
The lambda must receive a value for each parameter listed in `parameters`.
"""
from ..backend.ast.expressions import evaluate_expression
return evaluate_expression(self._expr, kwargs)
def __str__(self) -> str:
return str(self._expr)
def c_code(self) -> str:
"""Print the C code of this lambda"""
from ..backend.emission import CAstPrinter
printer = CAstPrinter()
return printer(self._expr)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment