Skip to content
Snippets Groups Projects
Commit 210f768a authored by Frederik Hennig's avatar Frederik Hennig
Browse files

WIP refactor launch configuration and gpu indexing

parent e67b5e23
No related branches found
No related tags found
1 merge request!449GPU Indexing Schemes and Launch Configurations
Pipeline #74042 failed
...@@ -380,7 +380,7 @@ class GpuOptions(ConfigBase): ...@@ -380,7 +380,7 @@ class GpuOptions(ConfigBase):
This check can be discarded through this option, at your own peril. This check can be discarded through this option, at your own peril.
""" """
block_size: BasicOption[tuple[int, int, int]] = BasicOption() block_size: BasicOption[tuple[int, int, int] | _AUTO_TYPE] = BasicOption(AUTO)
"""Desired block size for the execution of GPU kernels. May be overridden later by the runtime system.""" """Desired block size for the execution of GPU kernels. May be overridden later by the runtime system."""
manual_launch_grid: BasicOption[bool] = BasicOption(False) manual_launch_grid: BasicOption[bool] = BasicOption(False)
......
...@@ -516,26 +516,27 @@ def create_gpu_kernel_function( ...@@ -516,26 +516,27 @@ def create_gpu_kernel_function(
return kfunc return kfunc
def _get_function_params( def _symbol_to_param(ctx: KernelCreationContext, symbol: PsSymbol):
ctx: KernelCreationContext, symbols: Iterable[PsSymbol]
) -> list[Parameter]:
params: list[Parameter] = []
from pystencils.backend.memory import BufferBasePtr, BackendPrivateProperty from pystencils.backend.memory import BufferBasePtr, BackendPrivateProperty
for symb in symbols: props: set[PsSymbolProperty] = set()
props: set[PsSymbolProperty] = set() for prop in symbol.properties:
for prop in symb.properties: match prop:
match prop: case BufferBasePtr(buf):
case BufferBasePtr(buf): field = ctx.find_field(buf.name)
field = ctx.find_field(buf.name) props.add(FieldBasePtr(field))
props.add(FieldBasePtr(field)) case BackendPrivateProperty():
case BackendPrivateProperty(): pass
pass case _:
case _: props.add(prop)
props.add(prop)
params.append(Parameter(symb.name, symb.get_dtype(), props)) return Parameter(symbol.name, symbol.get_dtype(), props)
def _get_function_params(
ctx: KernelCreationContext, symbols: Iterable[PsSymbol]
) -> list[Parameter]:
params: list[Parameter] = [_symbol_to_param(ctx, s) for s in symbols]
params.sort(key=lambda p: p.name) params.sort(key=lambda p: p.name)
return params return params
......
from __future__ import annotations from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import cast, Any from typing import cast, Any, Callable
from itertools import chain from itertools import chain
from .functions import Lambda from .functions import Lambda
from .kernel import GpuKernel from .kernel import GpuKernel
from .parameters import Parameter from .parameters import Parameter
from .errors import CodegenError from .errors import CodegenError
from .config import GpuIndexingScheme, _AUTO_TYPE
from ..backend.kernelcreation import ( from ..backend.kernelcreation import (
KernelCreationContext, KernelCreationContext,
...@@ -18,6 +19,8 @@ from ..backend.platforms.cuda import ThreadToIndexMapping ...@@ -18,6 +19,8 @@ from ..backend.platforms.cuda import ThreadToIndexMapping
from ..backend.ast.expressions import PsExpression from ..backend.ast.expressions import PsExpression
dim3 = tuple[int, int, int]
_Dim3Params = tuple[Parameter, Parameter, Parameter]
_Dim3Lambda = tuple[Lambda, Lambda, Lambda] _Dim3Lambda = tuple[Lambda, Lambda, Lambda]
...@@ -48,29 +51,179 @@ class GpuLaunchConfiguration: ...@@ -48,29 +51,179 @@ class GpuLaunchConfiguration:
"""Parameters to this set of constraints""" """Parameters to this set of constraints"""
return self._params return self._params
@property def get_valuation(self) -> dict[Parameter, Any]:
def parameter_values(self) -> dict[Parameter, Any]:
"""Values for all parameters that are specific to the launch grid configuration and not """Values for all parameters that are specific to the launch grid configuration and not
also kernel parameters.""" also kernel parameters."""
return self._valuation return self._valuation
@property def get_block_size(self) -> _Dim3Lambda:
def block_size(self) -> _Dim3Lambda:
"""Constraints on the number of threads per block"""
return self._block_size return self._block_size
@property def get_grid_size(self) -> _Dim3Lambda:
def grid_size(self) -> _Dim3Lambda:
"""Constraints on the number of blocks on the grid"""
return self._grid_size return self._grid_size
class ManualLaunchConfiguration(GpuLaunchConfiguration):
"""Manual GPU launch configuration.
This launch configuration requires the user to set block and grid size.
"""
def __init__(
self,
block_size: _Dim3Lambda,
grid_size: _Dim3Lambda,
block_size_params: _Dim3Params,
grid_size_params: _Dim3Params,
):
super().__init__(
cast(_Dim3Lambda, block_size),
cast(_Dim3Lambda, grid_size),
set(block_size_params).union(grid_size_params),
)
self._block_size_params = block_size_params
self._grid_size_params = grid_size_params
self._user_block_size: dim3 | None = None
self._user_grid_size: dim3 | None = None
@property
def block_size(self) -> dim3 | None:
return self._user_block_size
@block_size.setter
def block_size(self, val: dim3):
self._user_block_size = val
@property
def grid_size(self) -> dim3 | None:
return self._user_grid_size
@grid_size.setter
def grid_size(self, val: dim3):
self._user_grid_size = val
def get_valuation(self) -> dict[Parameter, Any]:
if self._user_block_size is None:
raise AttributeError("No GPU block size was specified")
if self._user_grid_size is None:
raise AttributeError("No GPU grid size was specified")
valuation: dict[Parameter, Any] = dict()
for bs_param, bs in zip(self._block_size_params, self._user_block_size):
valuation[bs_param] = bs
for gs_param, gs in zip(self._grid_size_params, self._user_grid_size):
valuation[gs_param] = gs
return valuation
class GridFromBlockSizeConfiguration(GpuLaunchConfiguration):
"""GPU launch configuration that computes the grid size from a user-defined block size."""
def __init__(
self,
block_size: _Dim3Lambda,
grid_size: _Dim3Lambda,
block_size_params: _Dim3Params,
default_block_size: dim3 | None = None,
) -> None:
super().__init__(block_size, grid_size, set(block_size_params))
self._block_size_params = block_size_params
self._user_block_size: dim3 | None = default_block_size
@property
def block_size(self) -> dim3 | None:
return self._user_block_size
@block_size.setter
def block_size(self, val: dim3):
self._user_block_size = val
def get_valuation(self) -> dict[Parameter, Any]:
if self._user_block_size is None:
raise AttributeError("No GPU block size was specified")
valuation: dict[Parameter, Any] = dict()
for bs_param, bs in zip(self._block_size_params, self._user_block_size):
valuation[bs_param] = bs
return valuation
class GpuIndexing(ABC): class GpuIndexing(ABC):
@abstractmethod def __init__(
def get_thread_mapping(self) -> ThreadToIndexMapping | None: ... self,
ctx: KernelCreationContext,
scheme: GpuIndexingScheme,
block_size: dim3 | _AUTO_TYPE,
manual_launch_grid: bool,
) -> None:
self._ctx = ctx
self._scheme = scheme
self._block_size = block_size
self._manual_launch_grid = manual_launch_grid
from ..backend.kernelcreation import AstFactory
@abstractmethod self._factory = AstFactory(self._ctx)
def get_launch_config(self, kernel: GpuKernel) -> GpuLaunchConfiguration: ...
def get_thread_mapping(self):
from ..backend.platforms.cuda import Linear3DMapping, Blockwise4DMapping
match self._scheme:
case GpuIndexingScheme.Linear3D:
return Linear3DMapping()
case GpuIndexingScheme.Blockwise4D:
return Blockwise4DMapping()
def get_launch_config_factory(
self, scheme: GpuIndexingScheme
) -> Callable[[], GpuLaunchConfiguration]:
if self._manual_launch_grid:
return self._manual_config_factory()
raise NotImplementedError()
def _manual_config_factory(self) -> Callable[[], ManualLaunchConfiguration]:
ctx = self._ctx
block_size_symbols = [
ctx.get_new_symbol(f"gpuBlockSize_{c}", ctx.index_dtype) for c in range(3)
]
grid_size_symbols = [
ctx.get_new_symbol(f"gpuGridSize_{c}", ctx.index_dtype) for c in range(3)
]
block_size = tuple(
Lambda.from_expression(ctx, PsExpression.make(bs))
for bs in block_size_symbols
)
grid_size = tuple(
Lambda.from_expression(ctx, PsExpression.make(gs))
for gs in grid_size_symbols
)
from .driver import _symbol_to_param
bs_params = [_symbol_to_param(ctx, s) for s in block_size_symbols]
gs_params = [_symbol_to_param(ctx, s) for s in grid_size_symbols]
def factory():
return ManualLaunchConfiguration(
cast(_Dim3Lambda, block_size),
cast(_Dim3Lambda, grid_size),
cast(_Dim3Params, bs_params),
cast(_Dim3Params, gs_params),
)
return factory
class Linear3DGpuIndexing(GpuIndexing): class Linear3DGpuIndexing(GpuIndexing):
...@@ -88,23 +241,6 @@ class Linear3DGpuIndexing(GpuIndexing): ...@@ -88,23 +241,6 @@ class Linear3DGpuIndexing(GpuIndexing):
return Linear3DMapping() return Linear3DMapping()
def get_launch_config(self, kernel: GpuKernel) -> GpuLaunchConfiguration: def get_launch_config(self, kernel: GpuKernel) -> GpuLaunchConfiguration:
block_size, grid_size = self._prepare_launch_grid()
kernel_params = set(kernel.parameters)
launch_config_params = (
set().union(
*(lb.parameters for lb in chain(block_size, grid_size))
)
- kernel_params
)
return GpuLaunchConfiguration(
block_size=cast(_Dim3Lambda, tuple(block_size)),
grid_size=cast(_Dim3Lambda, tuple(grid_size)),
config_parameters=launch_config_params,
)
def _prepare_launch_grid(self):
work_items = self._get_work_items() work_items = self._get_work_items()
rank = len(work_items) rank = len(work_items)
...@@ -138,7 +274,17 @@ class Linear3DGpuIndexing(GpuIndexing): ...@@ -138,7 +274,17 @@ class Linear3DGpuIndexing(GpuIndexing):
for _ in range(3 - rank) for _ in range(3 - rank)
] ]
return block_size, grid_size from .driver import _symbol_to_param
block_size_params = tuple(
_symbol_to_param(self._ctx, s) for s in block_size_symbols
)
return GridFromBlockSizeConfiguration(
cast(_Dim3Lambda, tuple(block_size)),
cast(_Dim3Lambda, tuple(grid_size)),
cast(tuple[Parameter, Parameter, Parameter], block_size_params),
)
def _get_work_items(self) -> tuple[PsExpression, ...]: def _get_work_items(self) -> tuple[PsExpression, ...]:
ispace = self._ctx.get_iteration_space() ispace = self._ctx.get_iteration_space()
......
...@@ -67,7 +67,7 @@ class CupyKernelWrapper(KernelWrapper): ...@@ -67,7 +67,7 @@ class CupyKernelWrapper(KernelWrapper):
return devices.pop() return devices.pop()
def _get_cached_args(self, **kwargs): def _get_cached_args(self, **kwargs):
launch_config_params = self._launch_config.parameter_values launch_config_params = self._launch_config.get_valuation
key = tuple( key = tuple(
(k, v) for k, v in launch_config_params.items() (k, v) for k, v in launch_config_params.items()
) + tuple((k, id(v)) for k, v in kwargs.items()) ) + tuple((k, id(v)) for k, v in kwargs.items())
...@@ -195,7 +195,7 @@ class CupyKernelWrapper(KernelWrapper): ...@@ -195,7 +195,7 @@ class CupyKernelWrapper(KernelWrapper):
launch_cfg_valuation.update( launch_cfg_valuation.update(
{ {
param.name: value param.name: value
for param, value in self._launch_config.parameter_values.items() for param, value in self._launch_config.get_valuation.items()
} }
) )
...@@ -203,7 +203,7 @@ class CupyKernelWrapper(KernelWrapper): ...@@ -203,7 +203,7 @@ class CupyKernelWrapper(KernelWrapper):
tuple[int, int, int], tuple[int, int, int],
tuple( tuple(
int(component(**launch_cfg_valuation)) int(component(**launch_cfg_valuation))
for component in self._launch_config.block_size for component in self._launch_config.get_block_size()
), ),
) )
...@@ -211,7 +211,7 @@ class CupyKernelWrapper(KernelWrapper): ...@@ -211,7 +211,7 @@ class CupyKernelWrapper(KernelWrapper):
tuple[int, int, int], tuple[int, int, int],
tuple( tuple(
int(component(**launch_cfg_valuation)) int(component(**launch_cfg_valuation))
for component in self._launch_config.grid_size for component in self._launch_config.get_grid_size()
), ),
) )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment