Skip to content
Snippets Groups Projects
Commit 7bc419c0 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

reimplemented kernel constraints check as KernelAnalysis

parent 1344ac46
No related branches found
No related tags found
No related merge requests found
Pipeline #61008 failed
...@@ -569,6 +569,8 @@ class Field: ...@@ -569,6 +569,8 @@ class Field:
""" """
_iterable = False # see https://i10git.cs.fau.de/pycodegen/pystencils/-/merge_requests/166#note_10680 _iterable = False # see https://i10git.cs.fau.de/pycodegen/pystencils/-/merge_requests/166#note_10680
__match_args__ = ("field", "offsets", "index")
def __new__(cls, name, *args, **kwargs): def __new__(cls, name, *args, **kwargs):
obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs) obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs)
return obj return obj
......
from __future__ import annotations
from collections import namedtuple, defaultdict
from typing import Any, Sequence
from itertools import chain
import sympy as sp
from .context import KernelCreationContext
from ...field import Field
from ...assignment import Assignment
from ...simp import AssignmentCollection
from ...transformations import NestedScopes
from ..exceptions import PsInternalCompilerError
class KernelConstraintsError(Exception):
pass
class KernelAnalysis:
"""General analysis pass over a kernel expressed using the SymPy frontend.
The kernel analysis fulfills two tasks. It checks the SymPy input for consistency,
and populates the context with required knowledge about the kernel.
A `KernelAnalysis` object may be called at most once.
Consistency and Constraints
---------------------------
The following checks are performed:
- **SSA Form:** The given assignments must be in single-assignment form; each symbol must be written at most once.
- **Independence of Accesses:** To avoid loop-carried dependencies, each field may be written at most once at
each index, and if a field is written at some location with index `i`, it may only be read with index `i` in
the same location.
- **Independence of Writes:** A weaker requirement than access independence; each field may only be written once
at each index.
Knowledge Collection
--------------------
The following knowledge is collected into the context:
- The set of fields accessed in the kernel
"""
FieldAndIndex = namedtuple("FieldAndIndex", ["field", "index"])
def __init__(self, ctx: KernelCreationContext):
self._ctx = ctx
self._check_access_independence = False
self._check_double_writes = (
False # TODO: Access independence check implies double writes check
)
# Map pairs of fields and indices to offsets
self._field_writes: dict[KernelAnalysis.FieldAndIndex, set[Any]] = defaultdict(
set
)
self._fields_written: set[Field] = set()
self._fields_read: set[Field] = set()
self._scopes = NestedScopes()
self._called = False
def __call__(self, obj: AssignmentCollection | Sequence[Assignment] | Assignment):
if self._called:
raise PsInternalCompilerError("KernelAnalysis called twice!")
self._called = True
self._visit(obj)
for field in chain(self._fields_written, self._fields_read):
self._ctx.add_field(field)
def _visit(self, obj: Any):
match obj:
case AssignmentCollection(main_asms, subexps):
self._visit(subexps)
self._visit(main_asms)
case [*asms]: # lists and tuples are unpacked
for asm in asms:
self._visit(asm)
case Assignment(lhs, rhs):
self._handle_rhs(rhs)
self._handle_lhs(lhs)
case unknown:
raise KernelConstraintsError(
f"Don't know how to interpret {unknown} in a kernel."
)
def _handle_lhs(self, lhs: sp.Basic):
if not isinstance(lhs, sp.Symbol):
raise KernelConstraintsError(
f"Invalid expression on assignment left-hand side: {lhs}"
)
match lhs:
case Field.Access(field, offsets, index):
self._fields_written.add(field)
self._fields_read.update(lhs.indirect_addressing_fields)
fai = self.FieldAndIndex(field, index)
if self._check_double_writes and offsets in self._field_writes[fai]:
raise KernelConstraintsError(
f"Field {field.name} is written twice at the same location"
)
self._field_writes[fai].add(offsets)
if self._check_double_writes and len(self._field_writes[fai]) > 1:
raise KernelConstraintsError(
f"Field {field.name} is written at two different locations"
)
case sp.Symbol():
if self._scopes.is_defined_locally(lhs):
raise KernelConstraintsError(
f"Assignments not in SSA form, multiple assignments to {lhs.name}"
)
if lhs in self._scopes.free_parameters:
raise KernelConstraintsError(
f"Symbol {lhs.name} is written, after it has been read"
)
self._scopes.define_symbol(lhs)
def _handle_rhs(self, rhs: sp.Basic):
def rec(expr: sp.Basic):
match expr:
case Field.Access(field, offsets, index):
self._fields_read.add(field)
self._fields_read.update(expr.indirect_addressing_fields)
# TODO: Should we recurse into the arguments of the field access?
if self._check_access_independence:
writes = self._field_writes[
KernelAnalysis.FieldAndIndex(field, index)
]
assert len(writes) <= 1
for write_offset in writes:
if write_offset != offsets:
raise KernelConstraintsError(
f"Violation of loop independence condition. Field "
f"{field} is read at {offsets} and written at {write_offset}"
)
case sp.Symbol():
self._scopes.access_symbol(expr)
for arg in expr.args:
rec(arg)
rec(rhs)
...@@ -5,7 +5,8 @@ from dataclasses import dataclass ...@@ -5,7 +5,8 @@ from dataclasses import dataclass
from functools import reduce from functools import reduce
from operator import mul from operator import mul
from ...field import Field from ...simp import AssignmentCollection
from ...field import Field, FieldType
from ..typed_expressions import ( from ..typed_expressions import (
PsTypedVariable, PsTypedVariable,
...@@ -15,6 +16,7 @@ from ..typed_expressions import ( ...@@ -15,6 +16,7 @@ from ..typed_expressions import (
) )
from ..arrays import PsLinearizedArray from ..arrays import PsLinearizedArray
from .defaults import Pymbolic as Defaults from .defaults import Pymbolic as Defaults
from ..exceptions import PsInputError, PsInternalCompilerError
if TYPE_CHECKING: if TYPE_CHECKING:
from .context import KernelCreationContext from .context import KernelCreationContext
...@@ -141,3 +143,61 @@ class SparseIterationSpace(IterationSpace): ...@@ -141,3 +143,61 @@ class SparseIterationSpace(IterationSpace):
@property @property
def index_list(self) -> PsLinearizedArray: def index_list(self) -> PsLinearizedArray:
return self._index_list return self._index_list
def create_sparse_iteration_space(
ctx: KernelCreationContext, assignments: AssignmentCollection
) -> IterationSpace:
return NotImplemented
def create_full_iteration_space(
ctx: KernelCreationContext, assignments: AssignmentCollection
) -> IterationSpace:
assert not ctx.fields.index_fields
# Collect all relative accesses into domain fields
def access_filter(acc: Field.Access):
return acc.field.field_type in (
FieldType.GENERIC,
FieldType.STAGGERED,
FieldType.STAGGERED_FLUX,
)
domain_field_accesses = assignments.atoms(Field.Access)
domain_field_accesses = set(filter(access_filter, domain_field_accesses))
# The following scenarios exist:
# - We have at least one domain field -> find the common field and use it to determine the iteration region
# - We have no domain fields, but at least one custom field -> determine common field from custom fields
# - We have neither domain nor custom fields -> Error
from ...transformations import get_common_field
if len(domain_field_accesses) > 0:
archetype_field = get_common_field(ctx.fields.domain_fields)
inferred_gls = max([fa.required_ghost_layers for fa in domain_field_accesses])
elif len(ctx.fields.custom_fields) > 0:
archetype_field = get_common_field(ctx.fields.custom_fields)
inferred_gls = 0
else:
raise PsInputError(
"Unable to construct iteration space: The kernel contains no accesses to domain or custom fields."
)
# If the user provided a ghost layer specification, use that
# Otherwise, if an iteration slice was specified, use that
# Otherwise, use the inferred ghost layers
from .iteration_space import FullIterationSpace
if ctx.options.ghost_layers is not None:
return FullIterationSpace.create_with_ghost_layers(
ctx, archetype_field, ctx.options.ghost_layers
)
elif ctx.options.iteration_slice is not None:
raise PsInternalCompilerError("Iteration slices not supported yet")
else:
return FullIterationSpace.create_with_ghost_layers(
ctx, archetype_field, inferred_gls
)
from typing import Sequence
from itertools import chain from itertools import chain
from ...simp import AssignmentCollection from ...simp import AssignmentCollection
from ...field import Field, FieldType
from ...kernel_contrains_check import KernelConstraintsCheck
from ..ast import PsBlock from ..ast import PsBlock
from .context import KernelCreationContext, IterationSpace from .context import KernelCreationContext
from .analysis import KernelAnalysis
from .freeze import FreezeExpressions from .freeze import FreezeExpressions
from .typification import Typifier from .typification import Typifier
from .options import KernelCreationOptions from .options import KernelCreationOptions
from ..exceptions import PsInputError, PsInternalCompilerError from .iteration_space import (
IterationSpace,
create_sparse_iteration_space,
create_full_iteration_space,
)
# flake8: noqa # flake8: noqa
...@@ -20,26 +22,9 @@ def create_kernel(assignments: AssignmentCollection, options: KernelCreationOpti ...@@ -20,26 +22,9 @@ def create_kernel(assignments: AssignmentCollection, options: KernelCreationOpti
# 1. Prepare context # 1. Prepare context
ctx = KernelCreationContext(options) ctx = KernelCreationContext(options)
# 2. Check kernel constraints and collect all fields # 2. Check kernel constraints and collect knowledge
analysis = KernelAnalysis(ctx)
""" analysis(assignments)
TODO: Replace the KernelConstraintsCheck by a KernelAnalysis pass.
The kernel analysis should:
- Check constraints on the assignments (SSA form, independence conditions, ...)
- Collect all fields and register them in the context
- Maybe collect all field accesses and register them at the context
Running all this analysis in a single pass will likely improve compiler performance
since no additional searches, e.g. for field accesses, are necessary later.
"""
check = KernelConstraintsCheck() # TODO: config
check.visit(assignments)
# Collect all fields
for f in chain(check.fields_written, check.fields_read):
ctx.add_field(f)
# 3. Create iteration space # 3. Create iteration space
ispace: IterationSpace = ( ispace: IterationSpace = (
...@@ -74,63 +59,3 @@ def create_kernel(assignments: AssignmentCollection, options: KernelCreationOpti ...@@ -74,63 +59,3 @@ def create_kernel(assignments: AssignmentCollection, options: KernelCreationOpti
# - Loop Splitting, Tiling, Blocking # - Loop Splitting, Tiling, Blocking
# 8. Create and return kernel function. # 8. Create and return kernel function.
def create_sparse_iteration_space(
ctx: KernelCreationContext, assignments: AssignmentCollection
) -> IterationSpace:
return NotImplemented
def create_full_iteration_space(
ctx: KernelCreationContext, assignments: AssignmentCollection
) -> IterationSpace:
assert not ctx.fields.index_fields
# Collect all relative accesses into domain fields
def access_filter(acc: Field.Access):
return acc.field.field_type in (
FieldType.GENERIC,
FieldType.STAGGERED,
FieldType.STAGGERED_FLUX,
)
domain_field_accesses = assignments.atoms(Field.Access)
domain_field_accesses = set(filter(access_filter, domain_field_accesses))
# The following scenarios exist:
# - We have at least one domain field -> find the common field and use it to determine the iteration region
# - We have no domain fields, but at least one custom field -> determine common field from custom fields
# - We have neither domain nor custom fields -> Error
from ...transformations import get_common_field
if len(domain_field_accesses) > 0:
archetype_field = get_common_field(ctx.fields.domain_fields)
inferred_gls = max(
[fa.required_ghost_layers for fa in domain_field_accesses]
)
elif len(ctx.fields.custom_fields) > 0:
archetype_field = get_common_field(ctx.fields.custom_fields)
inferred_gls = 0
else:
raise PsInputError(
"Unable to construct iteration space: The kernel contains no accesses to domain or custom fields."
)
# If the user provided a ghost layer specification, use that
# Otherwise, if an iteration slice was specified, use that
# Otherwise, use the inferred ghost layers
from .iteration_space import FullIterationSpace
if ctx.options.ghost_layers is not None:
return FullIterationSpace.create_with_ghost_layers(
ctx, archetype_field, ctx.options.ghost_layers
)
elif ctx.options.iteration_slice is not None:
raise PsInternalCompilerError("Iteration slices not supported yet")
else:
return FullIterationSpace.create_with_ghost_layers(
ctx, archetype_field, inferred_gls
)
...@@ -31,6 +31,8 @@ class AssignmentCollection: ...@@ -31,6 +31,8 @@ class AssignmentCollection:
""" """
__match_args__ = ("main_assignments", "subexpressions")
# ------------------------------- Creation & Inplace Manipulation -------------------------------------------------- # ------------------------------- Creation & Inplace Manipulation --------------------------------------------------
def __init__(self, main_assignments: Union[List[Assignment], Dict[sp.Expr, sp.Expr]], def __init__(self, main_assignments: Union[List[Assignment], Dict[sp.Expr, sp.Expr]],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment