Add AUTO option to config, use it for ghost layers

b73cdcfc · Frederik Hennig · f2d82bf0 · b73cdcfc · b73cdcfc · b73cdcfc
Commit b73cdcfc authored 8 months ago by Frederik Hennig
--- a/src/pystencils/backend/kernelcreation/iteration_space.py
+++ b/src/pystencils/backend/kernelcreation/iteration_space.py
@@ -6,6 +6,7 @@ from functools import reduce
 from operator import mul

 from ...defaults import DEFAULTS
+from ...config import _AUTO_TYPE, AUTO
 from ...simp import AssignmentCollection
 from ...field import Field, FieldType

@@ -60,6 +61,7 @@ class FullIterationSpace(IterationSpace):

    @dataclass
    class Dimension:
+        """One dimension of a dense iteration space"""
        start: PsExpression
        stop: PsExpression
        step: PsExpression
@@ -180,7 +182,7 @@ class FullIterationSpace(IterationSpace):
    def __init__(
        self,
        ctx: KernelCreationContext,
-        dimensions: Sequence[Dimension],
+        dimensions: Sequence[FullIterationSpace.Dimension],
        archetype_field: Field | None = None,
    ):
        super().__init__(tuple(dim.counter for dim in dimensions))
@@ -192,22 +194,27 @@ class FullIterationSpace(IterationSpace):

    @property
    def dimensions(self):
+        """The dimensions of this iteration space"""
        return self._dimensions

    @property
    def lower(self):
+        """Lower limits of each dimension"""
        return (dim.start for dim in self._dimensions)

    @property
    def upper(self):
+        """Upper limits of each dimension"""
        return (dim.stop for dim in self._dimensions)

    @property
    def steps(self):
+        """Iteration steps of each dimension"""
        return (dim.step for dim in self._dimensions)

    @property
    def archetype_field(self) -> Field | None:
+        """Field whose shape and memory layout act as archetypes for this iteration space's dimensions."""
        return self._archetype_field
    
    @property
@@ -230,6 +237,13 @@ class FullIterationSpace(IterationSpace):
    def actual_iterations(
        self, dimension: int | FullIterationSpace.Dimension | None = None
    ) -> PsExpression:
+        """Construct an expression representing the actual number of unique points inside the iteration space.
+        
+        Args:
+            dimension: If an integer or a `Dimension` object is given, the number of iterations in that
+                dimension is computed. If `None`, the total number of iterations inside the entire space
+                is computed.
+        """
        from .typification import Typifier
        from ..transformations import EliminateConstants

@@ -399,7 +413,7 @@ def create_sparse_iteration_space(
 def create_full_iteration_space(
    ctx: KernelCreationContext,
    assignments: AssignmentCollection,
-    ghost_layers: None | int | Sequence[int | tuple[int, int]] = None,
+    ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None,
    iteration_slice: None | int | slice | tuple[int | slice, ...] = None,
 ) -> IterationSpace:
    assert not ctx.fields.index_fields
@@ -439,16 +453,7 @@ def create_full_iteration_space(
    # Otherwise, if an iteration slice was specified, use that
    # Otherwise, use the inferred ghost layers

-    if ghost_layers is not None:
-        ctx.metadata["ghost_layers"] = ghost_layers
-        return FullIterationSpace.create_with_ghost_layers(
-            ctx, ghost_layers, archetype_field
-        )
-    elif iteration_slice is not None:
-        return FullIterationSpace.create_from_slice(
-            ctx, iteration_slice, archetype_field
-        )
-    else:
+    if ghost_layers is AUTO:
        if len(domain_field_accesses) > 0:
            inferred_gls = max(
                [fa.required_ghost_layers for fa in domain_field_accesses]
@@ -460,3 +465,15 @@ def create_full_iteration_space(
        return FullIterationSpace.create_with_ghost_layers(
            ctx, inferred_gls, archetype_field
        )
+    elif ghost_layers is not None:
+        assert not isinstance(ghost_layers, _AUTO_TYPE)
+        ctx.metadata["ghost_layers"] = ghost_layers
+        return FullIterationSpace.create_with_ghost_layers(
+            ctx, ghost_layers, archetype_field
+        )
+    elif iteration_slice is not None:
+        return FullIterationSpace.create_from_slice(
+            ctx, iteration_slice, archetype_field
+        )
+    else:
+        assert False, "unreachable code"
--- a/src/pystencils/config.py
+++ b/src/pystencils/config.py
@@ -28,6 +28,14 @@ class PsOptionsError(Exception):
    """Indicates an option clash in the `CreateKernelConfig`."""


+class _AUTO_TYPE:
+    ...
+
+
+AUTO = _AUTO_TYPE()
+"""Special value that can be passed to some options for invoking automatic behaviour."""
+
+
 @dataclass
 class OpenMpConfig:
    """Parameters controlling kernel parallelization using OpenMP."""
@@ -68,8 +76,8 @@ class CpuOptimConfig:
    openmp: bool | OpenMpConfig = False
    """Enable OpenMP parallelization.
    
-    If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpParams`.
-    To customize OpenMP parallelization, pass an instance of `OpenMpParams` instead.
+    If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpConfig`.
+    To customize OpenMP parallelization, pass an instance of `OpenMpConfig` instead.
    """

    vectorize: bool | VectorizationConfig = False
@@ -188,11 +196,11 @@ class GpuIndexingConfig:
    If set to `True`, the kernel is generated for execution via
    `parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_
    -dispatch using
-    a flat `sycl::range`. In this case, the GPU block size will be inferred by the SYCL runtime.
+    a flat ``sycl::range``. In this case, the GPU block size will be inferred by the SYCL runtime.

-    If set to `False`, the kernel will receive an `nd_item` and has to be executed using
+    If set to `False`, the kernel will receive an ``nd_item`` and has to be executed using
    `parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_
-    with an `nd_range`. This allows manual specification of the block size.
+    with an ``nd_range``. This allows manual specification of the block size.
    """


@@ -207,38 +215,49 @@ class CreateKernelConfig:
    """Just-in-time compiler used to compile and load the kernel for invocation from the current Python environment.
    
    If left at `None`, a default just-in-time compiler will be inferred from the `target` parameter.
-    To explicitly disable JIT compilation, pass `pystencils.nbackend.jit.no_jit`.
+    To explicitly disable JIT compilation, pass `pystencils.backend.jit.no_jit`.
    """

    function_name: str = "kernel"
    """Name of the generated function"""

-    ghost_layers: None | int | Sequence[int | tuple[int, int]] = None
+    ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None
    """Specifies the number of ghost layers of the iteration region.
    
    Options:
-     - `None`: Required ghost layers are inferred from field accesses
+     - :py:data:`AUTO <pystencils.config.AUTO>`: Required ghost layers are inferred from field accesses
     - `int`:  A uniform number of ghost layers in each spatial coordinate is applied
     - ``Sequence[int, tuple[int, int]]``: Ghost layers are specified for each spatial coordinate.
        In each coordinate, a single integer specifies the ghost layers at both the lower and upper iteration limit,
        while a pair of integers specifies the lower and upper ghost layers separately.

    When manually specifying ghost layers, it is the user's responsibility to avoid out-of-bounds memory accesses.
-    If ``ghost_layers=None`` is specified, the iteration region may otherwise be set using the `iteration_slice` option.
+
+    .. note::
+        At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
    """

-    iteration_slice: None | Sequence[slice] = None
+    iteration_slice: None | int | slice | tuple[int | slice] = None
    """Specifies the kernel's iteration slice.
-    
-    `iteration_slice` may only be set if ``ghost_layers=None``.
-    If it is set, a slice must be specified for each spatial coordinate.
-    TODO: Specification of valid slices and their behaviour
+
+    Example:
+        >>> cfg = CreateKernelConfig(
+        ...     iteration_slice=ps.make_slice[3:14, 2:-2]
+        ... )
+        >>> cfg.iteration_slice
+        (slice(3, 14, None), slice(2, -2, None))
+
+    .. note::
+        At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
    """

    index_field: Field | None = None
    """Index field for a sparse kernel.
    
    If this option is set, a sparse kernel with the given field as index field will be generated.
+
+    .. note::
+        At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
    """

    """Data Types"""
@@ -288,10 +307,10 @@ class CreateKernelConfig:
    """Deprecated; use `default_dtype` instead"""

    cpu_openmp: InitVar[bool | int | None] = None
-    """Deprecated; use `cpu_optim.openmp` instead."""
+    """Deprecated; use `cpu_optim.openmp <CpuOptimConfig.openmp>` instead."""

    cpu_vectorize_info: InitVar[dict | None] = None
-    """Deprecated; use `cpu_optim.vectorize` instead."""
+    """Deprecated; use `cpu_optim.vectorize <CpuOptimConfig.vectorize>` instead."""

    gpu_indexing_params: InitVar[dict | None] = None
    """Deprecated; use `gpu_indexing` instead."""

--- a/src/pystencils/kernelcreation.py
+++ b/src/pystencils/kernelcreation.py
@@ -6,6 +6,7 @@ from .config import (
    CreateKernelConfig,
    OpenMpConfig,
    VectorizationConfig,
+    AUTO
 )
 from .backend import KernelFunction
 from .types import create_numeric_type, PsIntegerType, PsScalarType
@@ -91,49 +92,18 @@ class DefaultKernelCreationDriver:
        self,
        assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
    ):
-        if isinstance(assignments, AssignmentBase):
-            assignments = [assignments]
-
-        if not isinstance(assignments, AssignmentCollection):
-            assignments = AssignmentCollection(assignments)  # type: ignore
-
-        _ = _parse_simplification_hints(assignments)
-
-        analysis = KernelAnalysis(
-            self._ctx,
-            not self._cfg.skip_independence_check,
-            not self._cfg.allow_double_writes,
+        kernel_body = self.parse_kernel_body(
+            assignments
        )
-        analysis(assignments)
-
-        if len(self._ctx.fields.index_fields) > 0 or self._cfg.index_field is not None:
-            ispace = create_sparse_iteration_space(
-                self._ctx, assignments, index_field=self._cfg.index_field
-            )
-        else:
-            ispace = create_full_iteration_space(
-                self._ctx,
-                assignments,
-                ghost_layers=self._cfg.ghost_layers,
-                iteration_slice=self._cfg.iteration_slice,
-            )
-
-        self._ctx.set_iteration_space(ispace)
-
-        freeze = FreezeExpressions(self._ctx)
-        kernel_body = freeze(assignments)
-
-        typify = Typifier(self._ctx)
-        kernel_body = typify(kernel_body)

        match self._platform:
            case GenericCpu():
                kernel_ast = self._platform.materialize_iteration_space(
-                    kernel_body, ispace
+                    kernel_body, self._ctx.get_iteration_space()
                )
            case GenericGpu():
                kernel_ast, gpu_threads = self._platform.materialize_iteration_space(
-                    kernel_body, ispace
+                    kernel_body, self._ctx.get_iteration_space()
                )

        #   Fold and extract constants
@@ -179,6 +149,53 @@ class DefaultKernelCreationDriver:
                self._cfg.get_jit(),
            )

+    def parse_kernel_body(
+        self,
+        assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
+    ) -> PsBlock:
+        if isinstance(assignments, AssignmentBase):
+            assignments = [assignments]
+
+        if not isinstance(assignments, AssignmentCollection):
+            assignments = AssignmentCollection(assignments)  # type: ignore
+
+        _ = _parse_simplification_hints(assignments)
+
+        analysis = KernelAnalysis(
+            self._ctx,
+            not self._cfg.skip_independence_check,
+            not self._cfg.allow_double_writes,
+        )
+        analysis(assignments)
+
+        if self._cfg.index_field is not None:
+            ispace = create_sparse_iteration_space(
+                self._ctx, assignments, index_field=self._cfg.index_field
+            )
+        else:
+            gls = self._cfg.ghost_layers
+            islice = self._cfg.iteration_slice
+
+            if gls is None and islice is None:
+                gls = AUTO
+
+            ispace = create_full_iteration_space(
+                self._ctx,
+                assignments,
+                ghost_layers=gls,
+                iteration_slice=islice,
+            )
+
+        self._ctx.set_iteration_space(ispace)
+
+        freeze = FreezeExpressions(self._ctx)
+        kernel_body = freeze(assignments)
+
+        typify = Typifier(self._ctx)
+        kernel_body = typify(kernel_body)
+
+        return kernel_body
+
    def _transform_for_cpu(self, kernel_ast: PsBlock):
        canonicalize = CanonicalizeSymbols(self._ctx, True)
        kernel_ast = cast(PsBlock, canonicalize(kernel_ast))

--- a/src/pystencils/simp/simplificationstrategy.py
+++ b/src/pystencils/simp/simplificationstrategy.py
@@ -57,7 +57,7 @@ class SimplificationStrategy:

            def __str__(self):
                try:
-                    import tabulate
+                    from tabulate import tabulate
                    return tabulate(self.elements, headers=['Name', 'Runtime', 'Adds', 'Muls', 'Divs', 'Total'])
                except ImportError:
                    result = "Name, Adds, Muls, Divs, Runtime\n"