Skip to content
Snippets Groups Projects
Commit b73cdcfc authored by Frederik Hennig's avatar Frederik Hennig
Browse files

Add AUTO option to config, use it for ghost layers

parent f2d82bf0
No related branches found
No related tags found
3 merge requests!433Consolidate codegen and JIT modules.,!430Jupyter Inspection Framework, Book Theme, and Initial Drafts for Codegen Reference Guides,!429Iteration Slices: Extended GPU support + bugfixes
Pipeline #70301 passed
......@@ -6,6 +6,7 @@ from functools import reduce
from operator import mul
from ...defaults import DEFAULTS
from ...config import _AUTO_TYPE, AUTO
from ...simp import AssignmentCollection
from ...field import Field, FieldType
......@@ -60,6 +61,7 @@ class FullIterationSpace(IterationSpace):
@dataclass
class Dimension:
"""One dimension of a dense iteration space"""
start: PsExpression
stop: PsExpression
step: PsExpression
......@@ -180,7 +182,7 @@ class FullIterationSpace(IterationSpace):
def __init__(
self,
ctx: KernelCreationContext,
dimensions: Sequence[Dimension],
dimensions: Sequence[FullIterationSpace.Dimension],
archetype_field: Field | None = None,
):
super().__init__(tuple(dim.counter for dim in dimensions))
......@@ -192,22 +194,27 @@ class FullIterationSpace(IterationSpace):
@property
def dimensions(self):
"""The dimensions of this iteration space"""
return self._dimensions
@property
def lower(self):
"""Lower limits of each dimension"""
return (dim.start for dim in self._dimensions)
@property
def upper(self):
"""Upper limits of each dimension"""
return (dim.stop for dim in self._dimensions)
@property
def steps(self):
"""Iteration steps of each dimension"""
return (dim.step for dim in self._dimensions)
@property
def archetype_field(self) -> Field | None:
"""Field whose shape and memory layout act as archetypes for this iteration space's dimensions."""
return self._archetype_field
@property
......@@ -230,6 +237,13 @@ class FullIterationSpace(IterationSpace):
def actual_iterations(
self, dimension: int | FullIterationSpace.Dimension | None = None
) -> PsExpression:
"""Construct an expression representing the actual number of unique points inside the iteration space.
Args:
dimension: If an integer or a `Dimension` object is given, the number of iterations in that
dimension is computed. If `None`, the total number of iterations inside the entire space
is computed.
"""
from .typification import Typifier
from ..transformations import EliminateConstants
......@@ -399,7 +413,7 @@ def create_sparse_iteration_space(
def create_full_iteration_space(
ctx: KernelCreationContext,
assignments: AssignmentCollection,
ghost_layers: None | int | Sequence[int | tuple[int, int]] = None,
ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None,
iteration_slice: None | int | slice | tuple[int | slice, ...] = None,
) -> IterationSpace:
assert not ctx.fields.index_fields
......@@ -439,16 +453,7 @@ def create_full_iteration_space(
# Otherwise, if an iteration slice was specified, use that
# Otherwise, use the inferred ghost layers
if ghost_layers is not None:
ctx.metadata["ghost_layers"] = ghost_layers
return FullIterationSpace.create_with_ghost_layers(
ctx, ghost_layers, archetype_field
)
elif iteration_slice is not None:
return FullIterationSpace.create_from_slice(
ctx, iteration_slice, archetype_field
)
else:
if ghost_layers is AUTO:
if len(domain_field_accesses) > 0:
inferred_gls = max(
[fa.required_ghost_layers for fa in domain_field_accesses]
......@@ -460,3 +465,15 @@ def create_full_iteration_space(
return FullIterationSpace.create_with_ghost_layers(
ctx, inferred_gls, archetype_field
)
elif ghost_layers is not None:
assert not isinstance(ghost_layers, _AUTO_TYPE)
ctx.metadata["ghost_layers"] = ghost_layers
return FullIterationSpace.create_with_ghost_layers(
ctx, ghost_layers, archetype_field
)
elif iteration_slice is not None:
return FullIterationSpace.create_from_slice(
ctx, iteration_slice, archetype_field
)
else:
assert False, "unreachable code"
......@@ -28,6 +28,14 @@ class PsOptionsError(Exception):
"""Indicates an option clash in the `CreateKernelConfig`."""
class _AUTO_TYPE:
...
AUTO = _AUTO_TYPE()
"""Special value that can be passed to some options for invoking automatic behaviour."""
@dataclass
class OpenMpConfig:
"""Parameters controlling kernel parallelization using OpenMP."""
......@@ -68,8 +76,8 @@ class CpuOptimConfig:
openmp: bool | OpenMpConfig = False
"""Enable OpenMP parallelization.
If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpParams`.
To customize OpenMP parallelization, pass an instance of `OpenMpParams` instead.
If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpConfig`.
To customize OpenMP parallelization, pass an instance of `OpenMpConfig` instead.
"""
vectorize: bool | VectorizationConfig = False
......@@ -188,11 +196,11 @@ class GpuIndexingConfig:
If set to `True`, the kernel is generated for execution via
`parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_
-dispatch using
a flat `sycl::range`. In this case, the GPU block size will be inferred by the SYCL runtime.
a flat ``sycl::range``. In this case, the GPU block size will be inferred by the SYCL runtime.
If set to `False`, the kernel will receive an `nd_item` and has to be executed using
If set to `False`, the kernel will receive an ``nd_item`` and has to be executed using
`parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_
with an `nd_range`. This allows manual specification of the block size.
with an ``nd_range``. This allows manual specification of the block size.
"""
......@@ -207,38 +215,49 @@ class CreateKernelConfig:
"""Just-in-time compiler used to compile and load the kernel for invocation from the current Python environment.
If left at `None`, a default just-in-time compiler will be inferred from the `target` parameter.
To explicitly disable JIT compilation, pass `pystencils.nbackend.jit.no_jit`.
To explicitly disable JIT compilation, pass `pystencils.backend.jit.no_jit`.
"""
function_name: str = "kernel"
"""Name of the generated function"""
ghost_layers: None | int | Sequence[int | tuple[int, int]] = None
ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None
"""Specifies the number of ghost layers of the iteration region.
Options:
- `None`: Required ghost layers are inferred from field accesses
- :py:data:`AUTO <pystencils.config.AUTO>`: Required ghost layers are inferred from field accesses
- `int`: A uniform number of ghost layers in each spatial coordinate is applied
- ``Sequence[int, tuple[int, int]]``: Ghost layers are specified for each spatial coordinate.
In each coordinate, a single integer specifies the ghost layers at both the lower and upper iteration limit,
while a pair of integers specifies the lower and upper ghost layers separately.
When manually specifying ghost layers, it is the user's responsibility to avoid out-of-bounds memory accesses.
If ``ghost_layers=None`` is specified, the iteration region may otherwise be set using the `iteration_slice` option.
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
"""
iteration_slice: None | Sequence[slice] = None
iteration_slice: None | int | slice | tuple[int | slice] = None
"""Specifies the kernel's iteration slice.
`iteration_slice` may only be set if ``ghost_layers=None``.
If it is set, a slice must be specified for each spatial coordinate.
TODO: Specification of valid slices and their behaviour
Example:
>>> cfg = CreateKernelConfig(
... iteration_slice=ps.make_slice[3:14, 2:-2]
... )
>>> cfg.iteration_slice
(slice(3, 14, None), slice(2, -2, None))
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
"""
index_field: Field | None = None
"""Index field for a sparse kernel.
If this option is set, a sparse kernel with the given field as index field will be generated.
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
"""
"""Data Types"""
......@@ -288,10 +307,10 @@ class CreateKernelConfig:
"""Deprecated; use `default_dtype` instead"""
cpu_openmp: InitVar[bool | int | None] = None
"""Deprecated; use `cpu_optim.openmp` instead."""
"""Deprecated; use `cpu_optim.openmp <CpuOptimConfig.openmp>` instead."""
cpu_vectorize_info: InitVar[dict | None] = None
"""Deprecated; use `cpu_optim.vectorize` instead."""
"""Deprecated; use `cpu_optim.vectorize <CpuOptimConfig.vectorize>` instead."""
gpu_indexing_params: InitVar[dict | None] = None
"""Deprecated; use `gpu_indexing` instead."""
......
......@@ -6,6 +6,7 @@ from .config import (
CreateKernelConfig,
OpenMpConfig,
VectorizationConfig,
AUTO
)
from .backend import KernelFunction
from .types import create_numeric_type, PsIntegerType, PsScalarType
......@@ -91,49 +92,18 @@ class DefaultKernelCreationDriver:
self,
assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
):
if isinstance(assignments, AssignmentBase):
assignments = [assignments]
if not isinstance(assignments, AssignmentCollection):
assignments = AssignmentCollection(assignments) # type: ignore
_ = _parse_simplification_hints(assignments)
analysis = KernelAnalysis(
self._ctx,
not self._cfg.skip_independence_check,
not self._cfg.allow_double_writes,
kernel_body = self.parse_kernel_body(
assignments
)
analysis(assignments)
if len(self._ctx.fields.index_fields) > 0 or self._cfg.index_field is not None:
ispace = create_sparse_iteration_space(
self._ctx, assignments, index_field=self._cfg.index_field
)
else:
ispace = create_full_iteration_space(
self._ctx,
assignments,
ghost_layers=self._cfg.ghost_layers,
iteration_slice=self._cfg.iteration_slice,
)
self._ctx.set_iteration_space(ispace)
freeze = FreezeExpressions(self._ctx)
kernel_body = freeze(assignments)
typify = Typifier(self._ctx)
kernel_body = typify(kernel_body)
match self._platform:
case GenericCpu():
kernel_ast = self._platform.materialize_iteration_space(
kernel_body, ispace
kernel_body, self._ctx.get_iteration_space()
)
case GenericGpu():
kernel_ast, gpu_threads = self._platform.materialize_iteration_space(
kernel_body, ispace
kernel_body, self._ctx.get_iteration_space()
)
# Fold and extract constants
......@@ -179,6 +149,53 @@ class DefaultKernelCreationDriver:
self._cfg.get_jit(),
)
def parse_kernel_body(
self,
assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
) -> PsBlock:
if isinstance(assignments, AssignmentBase):
assignments = [assignments]
if not isinstance(assignments, AssignmentCollection):
assignments = AssignmentCollection(assignments) # type: ignore
_ = _parse_simplification_hints(assignments)
analysis = KernelAnalysis(
self._ctx,
not self._cfg.skip_independence_check,
not self._cfg.allow_double_writes,
)
analysis(assignments)
if self._cfg.index_field is not None:
ispace = create_sparse_iteration_space(
self._ctx, assignments, index_field=self._cfg.index_field
)
else:
gls = self._cfg.ghost_layers
islice = self._cfg.iteration_slice
if gls is None and islice is None:
gls = AUTO
ispace = create_full_iteration_space(
self._ctx,
assignments,
ghost_layers=gls,
iteration_slice=islice,
)
self._ctx.set_iteration_space(ispace)
freeze = FreezeExpressions(self._ctx)
kernel_body = freeze(assignments)
typify = Typifier(self._ctx)
kernel_body = typify(kernel_body)
return kernel_body
def _transform_for_cpu(self, kernel_ast: PsBlock):
canonicalize = CanonicalizeSymbols(self._ctx, True)
kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
......
......@@ -57,7 +57,7 @@ class SimplificationStrategy:
def __str__(self):
try:
import tabulate
from tabulate import tabulate
return tabulate(self.elements, headers=['Name', 'Runtime', 'Adds', 'Muls', 'Divs', 'Total'])
except ImportError:
result = "Name, Adds, Muls, Divs, Runtime\n"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment