diff --git a/src/pystencils/backend/kernelcreation/iteration_space.py b/src/pystencils/backend/kernelcreation/iteration_space.py index 05e7153bf63eb8580e7ff7c07b7d8499ec9b32db..c3c9eaa7a8825ed036cda92b4926d9abbb29a0a9 100644 --- a/src/pystencils/backend/kernelcreation/iteration_space.py +++ b/src/pystencils/backend/kernelcreation/iteration_space.py @@ -6,6 +6,7 @@ from functools import reduce from operator import mul from ...defaults import DEFAULTS +from ...config import _AUTO_TYPE, AUTO from ...simp import AssignmentCollection from ...field import Field, FieldType @@ -60,6 +61,7 @@ class FullIterationSpace(IterationSpace): @dataclass class Dimension: + """One dimension of a dense iteration space""" start: PsExpression stop: PsExpression step: PsExpression @@ -180,7 +182,7 @@ class FullIterationSpace(IterationSpace): def __init__( self, ctx: KernelCreationContext, - dimensions: Sequence[Dimension], + dimensions: Sequence[FullIterationSpace.Dimension], archetype_field: Field | None = None, ): super().__init__(tuple(dim.counter for dim in dimensions)) @@ -192,22 +194,27 @@ class FullIterationSpace(IterationSpace): @property def dimensions(self): + """The dimensions of this iteration space""" return self._dimensions @property def lower(self): + """Lower limits of each dimension""" return (dim.start for dim in self._dimensions) @property def upper(self): + """Upper limits of each dimension""" return (dim.stop for dim in self._dimensions) @property def steps(self): + """Iteration steps of each dimension""" return (dim.step for dim in self._dimensions) @property def archetype_field(self) -> Field | None: + """Field whose shape and memory layout act as archetypes for this iteration space's dimensions.""" return self._archetype_field @property @@ -230,6 +237,13 @@ class FullIterationSpace(IterationSpace): def actual_iterations( self, dimension: int | FullIterationSpace.Dimension | None = None ) -> PsExpression: + """Construct an expression representing the actual number of unique points inside the iteration space. + + Args: + dimension: If an integer or a `Dimension` object is given, the number of iterations in that + dimension is computed. If `None`, the total number of iterations inside the entire space + is computed. + """ from .typification import Typifier from ..transformations import EliminateConstants @@ -399,7 +413,7 @@ def create_sparse_iteration_space( def create_full_iteration_space( ctx: KernelCreationContext, assignments: AssignmentCollection, - ghost_layers: None | int | Sequence[int | tuple[int, int]] = None, + ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None, iteration_slice: None | int | slice | tuple[int | slice, ...] = None, ) -> IterationSpace: assert not ctx.fields.index_fields @@ -439,16 +453,7 @@ def create_full_iteration_space( # Otherwise, if an iteration slice was specified, use that # Otherwise, use the inferred ghost layers - if ghost_layers is not None: - ctx.metadata["ghost_layers"] = ghost_layers - return FullIterationSpace.create_with_ghost_layers( - ctx, ghost_layers, archetype_field - ) - elif iteration_slice is not None: - return FullIterationSpace.create_from_slice( - ctx, iteration_slice, archetype_field - ) - else: + if ghost_layers is AUTO: if len(domain_field_accesses) > 0: inferred_gls = max( [fa.required_ghost_layers for fa in domain_field_accesses] @@ -460,3 +465,15 @@ def create_full_iteration_space( return FullIterationSpace.create_with_ghost_layers( ctx, inferred_gls, archetype_field ) + elif ghost_layers is not None: + assert not isinstance(ghost_layers, _AUTO_TYPE) + ctx.metadata["ghost_layers"] = ghost_layers + return FullIterationSpace.create_with_ghost_layers( + ctx, ghost_layers, archetype_field + ) + elif iteration_slice is not None: + return FullIterationSpace.create_from_slice( + ctx, iteration_slice, archetype_field + ) + else: + assert False, "unreachable code" diff --git a/src/pystencils/config.py b/src/pystencils/config.py index 9e2af1b7e3cebe1e24b3a103ba72d677fc2d6d38..c688530aecd497ddced30cc47e83bf4e314f9ac8 100644 --- a/src/pystencils/config.py +++ b/src/pystencils/config.py @@ -28,6 +28,14 @@ class PsOptionsError(Exception): """Indicates an option clash in the `CreateKernelConfig`.""" +class _AUTO_TYPE: + ... + + +AUTO = _AUTO_TYPE() +"""Special value that can be passed to some options for invoking automatic behaviour.""" + + @dataclass class OpenMpConfig: """Parameters controlling kernel parallelization using OpenMP.""" @@ -68,8 +76,8 @@ class CpuOptimConfig: openmp: bool | OpenMpConfig = False """Enable OpenMP parallelization. - If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpParams`. - To customize OpenMP parallelization, pass an instance of `OpenMpParams` instead. + If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpConfig`. + To customize OpenMP parallelization, pass an instance of `OpenMpConfig` instead. """ vectorize: bool | VectorizationConfig = False @@ -188,11 +196,11 @@ class GpuIndexingConfig: If set to `True`, the kernel is generated for execution via `parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_ -dispatch using - a flat `sycl::range`. In this case, the GPU block size will be inferred by the SYCL runtime. + a flat ``sycl::range``. In this case, the GPU block size will be inferred by the SYCL runtime. - If set to `False`, the kernel will receive an `nd_item` and has to be executed using + If set to `False`, the kernel will receive an ``nd_item`` and has to be executed using `parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_ - with an `nd_range`. This allows manual specification of the block size. + with an ``nd_range``. This allows manual specification of the block size. """ @@ -207,38 +215,49 @@ class CreateKernelConfig: """Just-in-time compiler used to compile and load the kernel for invocation from the current Python environment. If left at `None`, a default just-in-time compiler will be inferred from the `target` parameter. - To explicitly disable JIT compilation, pass `pystencils.nbackend.jit.no_jit`. + To explicitly disable JIT compilation, pass `pystencils.backend.jit.no_jit`. """ function_name: str = "kernel" """Name of the generated function""" - ghost_layers: None | int | Sequence[int | tuple[int, int]] = None + ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None """Specifies the number of ghost layers of the iteration region. Options: - - `None`: Required ghost layers are inferred from field accesses + - :py:data:`AUTO <pystencils.config.AUTO>`: Required ghost layers are inferred from field accesses - `int`: A uniform number of ghost layers in each spatial coordinate is applied - ``Sequence[int, tuple[int, int]]``: Ghost layers are specified for each spatial coordinate. In each coordinate, a single integer specifies the ghost layers at both the lower and upper iteration limit, while a pair of integers specifies the lower and upper ghost layers separately. When manually specifying ghost layers, it is the user's responsibility to avoid out-of-bounds memory accesses. - If ``ghost_layers=None`` is specified, the iteration region may otherwise be set using the `iteration_slice` option. + + .. note:: + At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set. """ - iteration_slice: None | Sequence[slice] = None + iteration_slice: None | int | slice | tuple[int | slice] = None """Specifies the kernel's iteration slice. - - `iteration_slice` may only be set if ``ghost_layers=None``. - If it is set, a slice must be specified for each spatial coordinate. - TODO: Specification of valid slices and their behaviour + + Example: + >>> cfg = CreateKernelConfig( + ... iteration_slice=ps.make_slice[3:14, 2:-2] + ... ) + >>> cfg.iteration_slice + (slice(3, 14, None), slice(2, -2, None)) + + .. note:: + At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set. """ index_field: Field | None = None """Index field for a sparse kernel. If this option is set, a sparse kernel with the given field as index field will be generated. + + .. note:: + At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set. """ """Data Types""" @@ -288,10 +307,10 @@ class CreateKernelConfig: """Deprecated; use `default_dtype` instead""" cpu_openmp: InitVar[bool | int | None] = None - """Deprecated; use `cpu_optim.openmp` instead.""" + """Deprecated; use `cpu_optim.openmp <CpuOptimConfig.openmp>` instead.""" cpu_vectorize_info: InitVar[dict | None] = None - """Deprecated; use `cpu_optim.vectorize` instead.""" + """Deprecated; use `cpu_optim.vectorize <CpuOptimConfig.vectorize>` instead.""" gpu_indexing_params: InitVar[dict | None] = None """Deprecated; use `gpu_indexing` instead.""" diff --git a/src/pystencils/kernelcreation.py b/src/pystencils/kernelcreation.py index 651a67cf2092a93e9e1cab3f393c2edd1baf15a9..548fbc9bba8c1606fbba2929324e9cea273b73b3 100644 --- a/src/pystencils/kernelcreation.py +++ b/src/pystencils/kernelcreation.py @@ -6,6 +6,7 @@ from .config import ( CreateKernelConfig, OpenMpConfig, VectorizationConfig, + AUTO ) from .backend import KernelFunction from .types import create_numeric_type, PsIntegerType, PsScalarType @@ -91,49 +92,18 @@ class DefaultKernelCreationDriver: self, assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, ): - if isinstance(assignments, AssignmentBase): - assignments = [assignments] - - if not isinstance(assignments, AssignmentCollection): - assignments = AssignmentCollection(assignments) # type: ignore - - _ = _parse_simplification_hints(assignments) - - analysis = KernelAnalysis( - self._ctx, - not self._cfg.skip_independence_check, - not self._cfg.allow_double_writes, + kernel_body = self.parse_kernel_body( + assignments ) - analysis(assignments) - - if len(self._ctx.fields.index_fields) > 0 or self._cfg.index_field is not None: - ispace = create_sparse_iteration_space( - self._ctx, assignments, index_field=self._cfg.index_field - ) - else: - ispace = create_full_iteration_space( - self._ctx, - assignments, - ghost_layers=self._cfg.ghost_layers, - iteration_slice=self._cfg.iteration_slice, - ) - - self._ctx.set_iteration_space(ispace) - - freeze = FreezeExpressions(self._ctx) - kernel_body = freeze(assignments) - - typify = Typifier(self._ctx) - kernel_body = typify(kernel_body) match self._platform: case GenericCpu(): kernel_ast = self._platform.materialize_iteration_space( - kernel_body, ispace + kernel_body, self._ctx.get_iteration_space() ) case GenericGpu(): kernel_ast, gpu_threads = self._platform.materialize_iteration_space( - kernel_body, ispace + kernel_body, self._ctx.get_iteration_space() ) # Fold and extract constants @@ -179,6 +149,53 @@ class DefaultKernelCreationDriver: self._cfg.get_jit(), ) + def parse_kernel_body( + self, + assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, + ) -> PsBlock: + if isinstance(assignments, AssignmentBase): + assignments = [assignments] + + if not isinstance(assignments, AssignmentCollection): + assignments = AssignmentCollection(assignments) # type: ignore + + _ = _parse_simplification_hints(assignments) + + analysis = KernelAnalysis( + self._ctx, + not self._cfg.skip_independence_check, + not self._cfg.allow_double_writes, + ) + analysis(assignments) + + if self._cfg.index_field is not None: + ispace = create_sparse_iteration_space( + self._ctx, assignments, index_field=self._cfg.index_field + ) + else: + gls = self._cfg.ghost_layers + islice = self._cfg.iteration_slice + + if gls is None and islice is None: + gls = AUTO + + ispace = create_full_iteration_space( + self._ctx, + assignments, + ghost_layers=gls, + iteration_slice=islice, + ) + + self._ctx.set_iteration_space(ispace) + + freeze = FreezeExpressions(self._ctx) + kernel_body = freeze(assignments) + + typify = Typifier(self._ctx) + kernel_body = typify(kernel_body) + + return kernel_body + def _transform_for_cpu(self, kernel_ast: PsBlock): canonicalize = CanonicalizeSymbols(self._ctx, True) kernel_ast = cast(PsBlock, canonicalize(kernel_ast)) diff --git a/src/pystencils/simp/simplificationstrategy.py b/src/pystencils/simp/simplificationstrategy.py index 22ffa34d04bc2731f615bd685137c8abebf9d58b..7cba94f8bb80e69afe039a1bc88822e627781a23 100644 --- a/src/pystencils/simp/simplificationstrategy.py +++ b/src/pystencils/simp/simplificationstrategy.py @@ -57,7 +57,7 @@ class SimplificationStrategy: def __str__(self): try: - import tabulate + from tabulate import tabulate return tabulate(self.elements, headers=['Name', 'Runtime', 'Adds', 'Muls', 'Divs', 'Total']) except ImportError: result = "Name, Adds, Muls, Divs, Runtime\n"