Skip to content
Snippets Groups Projects
Commit b73cdcfc authored by Frederik Hennig's avatar Frederik Hennig
Browse files

Add AUTO option to config, use it for ghost layers

parent f2d82bf0
No related branches found
No related tags found
3 merge requests!433Consolidate codegen and JIT modules.,!430Jupyter Inspection Framework, Book Theme, and Initial Drafts for Codegen Reference Guides,!429Iteration Slices: Extended GPU support + bugfixes
Pipeline #70301 passed
...@@ -6,6 +6,7 @@ from functools import reduce ...@@ -6,6 +6,7 @@ from functools import reduce
from operator import mul from operator import mul
from ...defaults import DEFAULTS from ...defaults import DEFAULTS
from ...config import _AUTO_TYPE, AUTO
from ...simp import AssignmentCollection from ...simp import AssignmentCollection
from ...field import Field, FieldType from ...field import Field, FieldType
...@@ -60,6 +61,7 @@ class FullIterationSpace(IterationSpace): ...@@ -60,6 +61,7 @@ class FullIterationSpace(IterationSpace):
@dataclass @dataclass
class Dimension: class Dimension:
"""One dimension of a dense iteration space"""
start: PsExpression start: PsExpression
stop: PsExpression stop: PsExpression
step: PsExpression step: PsExpression
...@@ -180,7 +182,7 @@ class FullIterationSpace(IterationSpace): ...@@ -180,7 +182,7 @@ class FullIterationSpace(IterationSpace):
def __init__( def __init__(
self, self,
ctx: KernelCreationContext, ctx: KernelCreationContext,
dimensions: Sequence[Dimension], dimensions: Sequence[FullIterationSpace.Dimension],
archetype_field: Field | None = None, archetype_field: Field | None = None,
): ):
super().__init__(tuple(dim.counter for dim in dimensions)) super().__init__(tuple(dim.counter for dim in dimensions))
...@@ -192,22 +194,27 @@ class FullIterationSpace(IterationSpace): ...@@ -192,22 +194,27 @@ class FullIterationSpace(IterationSpace):
@property @property
def dimensions(self): def dimensions(self):
"""The dimensions of this iteration space"""
return self._dimensions return self._dimensions
@property @property
def lower(self): def lower(self):
"""Lower limits of each dimension"""
return (dim.start for dim in self._dimensions) return (dim.start for dim in self._dimensions)
@property @property
def upper(self): def upper(self):
"""Upper limits of each dimension"""
return (dim.stop for dim in self._dimensions) return (dim.stop for dim in self._dimensions)
@property @property
def steps(self): def steps(self):
"""Iteration steps of each dimension"""
return (dim.step for dim in self._dimensions) return (dim.step for dim in self._dimensions)
@property @property
def archetype_field(self) -> Field | None: def archetype_field(self) -> Field | None:
"""Field whose shape and memory layout act as archetypes for this iteration space's dimensions."""
return self._archetype_field return self._archetype_field
@property @property
...@@ -230,6 +237,13 @@ class FullIterationSpace(IterationSpace): ...@@ -230,6 +237,13 @@ class FullIterationSpace(IterationSpace):
def actual_iterations( def actual_iterations(
self, dimension: int | FullIterationSpace.Dimension | None = None self, dimension: int | FullIterationSpace.Dimension | None = None
) -> PsExpression: ) -> PsExpression:
"""Construct an expression representing the actual number of unique points inside the iteration space.
Args:
dimension: If an integer or a `Dimension` object is given, the number of iterations in that
dimension is computed. If `None`, the total number of iterations inside the entire space
is computed.
"""
from .typification import Typifier from .typification import Typifier
from ..transformations import EliminateConstants from ..transformations import EliminateConstants
...@@ -399,7 +413,7 @@ def create_sparse_iteration_space( ...@@ -399,7 +413,7 @@ def create_sparse_iteration_space(
def create_full_iteration_space( def create_full_iteration_space(
ctx: KernelCreationContext, ctx: KernelCreationContext,
assignments: AssignmentCollection, assignments: AssignmentCollection,
ghost_layers: None | int | Sequence[int | tuple[int, int]] = None, ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None,
iteration_slice: None | int | slice | tuple[int | slice, ...] = None, iteration_slice: None | int | slice | tuple[int | slice, ...] = None,
) -> IterationSpace: ) -> IterationSpace:
assert not ctx.fields.index_fields assert not ctx.fields.index_fields
...@@ -439,16 +453,7 @@ def create_full_iteration_space( ...@@ -439,16 +453,7 @@ def create_full_iteration_space(
# Otherwise, if an iteration slice was specified, use that # Otherwise, if an iteration slice was specified, use that
# Otherwise, use the inferred ghost layers # Otherwise, use the inferred ghost layers
if ghost_layers is not None: if ghost_layers is AUTO:
ctx.metadata["ghost_layers"] = ghost_layers
return FullIterationSpace.create_with_ghost_layers(
ctx, ghost_layers, archetype_field
)
elif iteration_slice is not None:
return FullIterationSpace.create_from_slice(
ctx, iteration_slice, archetype_field
)
else:
if len(domain_field_accesses) > 0: if len(domain_field_accesses) > 0:
inferred_gls = max( inferred_gls = max(
[fa.required_ghost_layers for fa in domain_field_accesses] [fa.required_ghost_layers for fa in domain_field_accesses]
...@@ -460,3 +465,15 @@ def create_full_iteration_space( ...@@ -460,3 +465,15 @@ def create_full_iteration_space(
return FullIterationSpace.create_with_ghost_layers( return FullIterationSpace.create_with_ghost_layers(
ctx, inferred_gls, archetype_field ctx, inferred_gls, archetype_field
) )
elif ghost_layers is not None:
assert not isinstance(ghost_layers, _AUTO_TYPE)
ctx.metadata["ghost_layers"] = ghost_layers
return FullIterationSpace.create_with_ghost_layers(
ctx, ghost_layers, archetype_field
)
elif iteration_slice is not None:
return FullIterationSpace.create_from_slice(
ctx, iteration_slice, archetype_field
)
else:
assert False, "unreachable code"
...@@ -28,6 +28,14 @@ class PsOptionsError(Exception): ...@@ -28,6 +28,14 @@ class PsOptionsError(Exception):
"""Indicates an option clash in the `CreateKernelConfig`.""" """Indicates an option clash in the `CreateKernelConfig`."""
class _AUTO_TYPE:
...
AUTO = _AUTO_TYPE()
"""Special value that can be passed to some options for invoking automatic behaviour."""
@dataclass @dataclass
class OpenMpConfig: class OpenMpConfig:
"""Parameters controlling kernel parallelization using OpenMP.""" """Parameters controlling kernel parallelization using OpenMP."""
...@@ -68,8 +76,8 @@ class CpuOptimConfig: ...@@ -68,8 +76,8 @@ class CpuOptimConfig:
openmp: bool | OpenMpConfig = False openmp: bool | OpenMpConfig = False
"""Enable OpenMP parallelization. """Enable OpenMP parallelization.
If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpParams`. If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpConfig`.
To customize OpenMP parallelization, pass an instance of `OpenMpParams` instead. To customize OpenMP parallelization, pass an instance of `OpenMpConfig` instead.
""" """
vectorize: bool | VectorizationConfig = False vectorize: bool | VectorizationConfig = False
...@@ -188,11 +196,11 @@ class GpuIndexingConfig: ...@@ -188,11 +196,11 @@ class GpuIndexingConfig:
If set to `True`, the kernel is generated for execution via If set to `True`, the kernel is generated for execution via
`parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_ `parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_
-dispatch using -dispatch using
a flat `sycl::range`. In this case, the GPU block size will be inferred by the SYCL runtime. a flat ``sycl::range``. In this case, the GPU block size will be inferred by the SYCL runtime.
If set to `False`, the kernel will receive an `nd_item` and has to be executed using If set to `False`, the kernel will receive an ``nd_item`` and has to be executed using
`parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_ `parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_
with an `nd_range`. This allows manual specification of the block size. with an ``nd_range``. This allows manual specification of the block size.
""" """
...@@ -207,38 +215,49 @@ class CreateKernelConfig: ...@@ -207,38 +215,49 @@ class CreateKernelConfig:
"""Just-in-time compiler used to compile and load the kernel for invocation from the current Python environment. """Just-in-time compiler used to compile and load the kernel for invocation from the current Python environment.
If left at `None`, a default just-in-time compiler will be inferred from the `target` parameter. If left at `None`, a default just-in-time compiler will be inferred from the `target` parameter.
To explicitly disable JIT compilation, pass `pystencils.nbackend.jit.no_jit`. To explicitly disable JIT compilation, pass `pystencils.backend.jit.no_jit`.
""" """
function_name: str = "kernel" function_name: str = "kernel"
"""Name of the generated function""" """Name of the generated function"""
ghost_layers: None | int | Sequence[int | tuple[int, int]] = None ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None
"""Specifies the number of ghost layers of the iteration region. """Specifies the number of ghost layers of the iteration region.
Options: Options:
- `None`: Required ghost layers are inferred from field accesses - :py:data:`AUTO <pystencils.config.AUTO>`: Required ghost layers are inferred from field accesses
- `int`: A uniform number of ghost layers in each spatial coordinate is applied - `int`: A uniform number of ghost layers in each spatial coordinate is applied
- ``Sequence[int, tuple[int, int]]``: Ghost layers are specified for each spatial coordinate. - ``Sequence[int, tuple[int, int]]``: Ghost layers are specified for each spatial coordinate.
In each coordinate, a single integer specifies the ghost layers at both the lower and upper iteration limit, In each coordinate, a single integer specifies the ghost layers at both the lower and upper iteration limit,
while a pair of integers specifies the lower and upper ghost layers separately. while a pair of integers specifies the lower and upper ghost layers separately.
When manually specifying ghost layers, it is the user's responsibility to avoid out-of-bounds memory accesses. When manually specifying ghost layers, it is the user's responsibility to avoid out-of-bounds memory accesses.
If ``ghost_layers=None`` is specified, the iteration region may otherwise be set using the `iteration_slice` option.
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
""" """
iteration_slice: None | Sequence[slice] = None iteration_slice: None | int | slice | tuple[int | slice] = None
"""Specifies the kernel's iteration slice. """Specifies the kernel's iteration slice.
`iteration_slice` may only be set if ``ghost_layers=None``. Example:
If it is set, a slice must be specified for each spatial coordinate. >>> cfg = CreateKernelConfig(
TODO: Specification of valid slices and their behaviour ... iteration_slice=ps.make_slice[3:14, 2:-2]
... )
>>> cfg.iteration_slice
(slice(3, 14, None), slice(2, -2, None))
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
""" """
index_field: Field | None = None index_field: Field | None = None
"""Index field for a sparse kernel. """Index field for a sparse kernel.
If this option is set, a sparse kernel with the given field as index field will be generated. If this option is set, a sparse kernel with the given field as index field will be generated.
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
""" """
"""Data Types""" """Data Types"""
...@@ -288,10 +307,10 @@ class CreateKernelConfig: ...@@ -288,10 +307,10 @@ class CreateKernelConfig:
"""Deprecated; use `default_dtype` instead""" """Deprecated; use `default_dtype` instead"""
cpu_openmp: InitVar[bool | int | None] = None cpu_openmp: InitVar[bool | int | None] = None
"""Deprecated; use `cpu_optim.openmp` instead.""" """Deprecated; use `cpu_optim.openmp <CpuOptimConfig.openmp>` instead."""
cpu_vectorize_info: InitVar[dict | None] = None cpu_vectorize_info: InitVar[dict | None] = None
"""Deprecated; use `cpu_optim.vectorize` instead.""" """Deprecated; use `cpu_optim.vectorize <CpuOptimConfig.vectorize>` instead."""
gpu_indexing_params: InitVar[dict | None] = None gpu_indexing_params: InitVar[dict | None] = None
"""Deprecated; use `gpu_indexing` instead.""" """Deprecated; use `gpu_indexing` instead."""
......
...@@ -6,6 +6,7 @@ from .config import ( ...@@ -6,6 +6,7 @@ from .config import (
CreateKernelConfig, CreateKernelConfig,
OpenMpConfig, OpenMpConfig,
VectorizationConfig, VectorizationConfig,
AUTO
) )
from .backend import KernelFunction from .backend import KernelFunction
from .types import create_numeric_type, PsIntegerType, PsScalarType from .types import create_numeric_type, PsIntegerType, PsScalarType
...@@ -91,49 +92,18 @@ class DefaultKernelCreationDriver: ...@@ -91,49 +92,18 @@ class DefaultKernelCreationDriver:
self, self,
assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
): ):
if isinstance(assignments, AssignmentBase): kernel_body = self.parse_kernel_body(
assignments = [assignments] assignments
if not isinstance(assignments, AssignmentCollection):
assignments = AssignmentCollection(assignments) # type: ignore
_ = _parse_simplification_hints(assignments)
analysis = KernelAnalysis(
self._ctx,
not self._cfg.skip_independence_check,
not self._cfg.allow_double_writes,
) )
analysis(assignments)
if len(self._ctx.fields.index_fields) > 0 or self._cfg.index_field is not None:
ispace = create_sparse_iteration_space(
self._ctx, assignments, index_field=self._cfg.index_field
)
else:
ispace = create_full_iteration_space(
self._ctx,
assignments,
ghost_layers=self._cfg.ghost_layers,
iteration_slice=self._cfg.iteration_slice,
)
self._ctx.set_iteration_space(ispace)
freeze = FreezeExpressions(self._ctx)
kernel_body = freeze(assignments)
typify = Typifier(self._ctx)
kernel_body = typify(kernel_body)
match self._platform: match self._platform:
case GenericCpu(): case GenericCpu():
kernel_ast = self._platform.materialize_iteration_space( kernel_ast = self._platform.materialize_iteration_space(
kernel_body, ispace kernel_body, self._ctx.get_iteration_space()
) )
case GenericGpu(): case GenericGpu():
kernel_ast, gpu_threads = self._platform.materialize_iteration_space( kernel_ast, gpu_threads = self._platform.materialize_iteration_space(
kernel_body, ispace kernel_body, self._ctx.get_iteration_space()
) )
# Fold and extract constants # Fold and extract constants
...@@ -179,6 +149,53 @@ class DefaultKernelCreationDriver: ...@@ -179,6 +149,53 @@ class DefaultKernelCreationDriver:
self._cfg.get_jit(), self._cfg.get_jit(),
) )
def parse_kernel_body(
self,
assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
) -> PsBlock:
if isinstance(assignments, AssignmentBase):
assignments = [assignments]
if not isinstance(assignments, AssignmentCollection):
assignments = AssignmentCollection(assignments) # type: ignore
_ = _parse_simplification_hints(assignments)
analysis = KernelAnalysis(
self._ctx,
not self._cfg.skip_independence_check,
not self._cfg.allow_double_writes,
)
analysis(assignments)
if self._cfg.index_field is not None:
ispace = create_sparse_iteration_space(
self._ctx, assignments, index_field=self._cfg.index_field
)
else:
gls = self._cfg.ghost_layers
islice = self._cfg.iteration_slice
if gls is None and islice is None:
gls = AUTO
ispace = create_full_iteration_space(
self._ctx,
assignments,
ghost_layers=gls,
iteration_slice=islice,
)
self._ctx.set_iteration_space(ispace)
freeze = FreezeExpressions(self._ctx)
kernel_body = freeze(assignments)
typify = Typifier(self._ctx)
kernel_body = typify(kernel_body)
return kernel_body
def _transform_for_cpu(self, kernel_ast: PsBlock): def _transform_for_cpu(self, kernel_ast: PsBlock):
canonicalize = CanonicalizeSymbols(self._ctx, True) canonicalize = CanonicalizeSymbols(self._ctx, True)
kernel_ast = cast(PsBlock, canonicalize(kernel_ast)) kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
......
...@@ -57,7 +57,7 @@ class SimplificationStrategy: ...@@ -57,7 +57,7 @@ class SimplificationStrategy:
def __str__(self): def __str__(self):
try: try:
import tabulate from tabulate import tabulate
return tabulate(self.elements, headers=['Name', 'Runtime', 'Adds', 'Muls', 'Divs', 'Total']) return tabulate(self.elements, headers=['Name', 'Runtime', 'Adds', 'Muls', 'Divs', 'Total'])
except ImportError: except ImportError:
result = "Name, Adds, Muls, Divs, Runtime\n" result = "Name, Adds, Muls, Divs, Runtime\n"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment