diff --git a/src/pystencils/__init__.py b/src/pystencils/__init__.py index b2cdeca07d6040e198f23f1b5666352fdfb991be..4374ccda4ef8c508a909975f254cbf32936912ae 100644 --- a/src/pystencils/__init__.py +++ b/src/pystencils/__init__.py @@ -1,6 +1,13 @@ """Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions""" -from .target import Target +from .codegen import ( + Target, + CreateKernelConfig, + CpuOptimConfig, + VectorizationConfig, + OpenMpConfig, + GpuIndexingConfig, +) from .defaults import DEFAULTS from . import fd from . import stencil as stencil @@ -9,16 +16,9 @@ from .inspection import inspect from .field import Field, FieldType, fields from .types import create_type, create_numeric_type from .cache import clear_cache -from .config import ( - CreateKernelConfig, - CpuOptimConfig, - VectorizationConfig, - OpenMpConfig, - GpuIndexingConfig, -) from .kernel_decorator import kernel, kernel_config from .kernelcreation import create_kernel, create_staggered_kernel -from .backend.kernelfunction import KernelFunction +from .codegen import Kernel from .backend.jit import no_jit from .backend.exceptions import KernelConstraintsError from .slicing import make_slice @@ -55,7 +55,7 @@ __all__ = [ "OpenMpConfig", "create_kernel", "create_staggered_kernel", - "KernelFunction", + "Kernel", "KernelConstraintsError", "Target", "no_jit", diff --git a/src/pystencils/backend/__init__.py b/src/pystencils/backend/__init__.py index b947a112ecb2be7762fefdf54afd4dffc185c319..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/src/pystencils/backend/__init__.py +++ b/src/pystencils/backend/__init__.py @@ -1,14 +0,0 @@ -from .kernelfunction import ( - KernelParameter, - KernelFunction, - GpuKernelFunction, -) - -from .constraints import KernelParamsConstraint - -__all__ = [ - "KernelParameter", - "KernelFunction", - "GpuKernelFunction", - "KernelParamsConstraint", -] diff --git a/src/pystencils/backend/emission/base_printer.py b/src/pystencils/backend/emission/base_printer.py index 50cd1bfeaf7f97295e4c6c557fa26acf933c04b7..d721b9f895c79ebdd6a58858cc0408613fefa6e4 100644 --- a/src/pystencils/backend/emission/base_printer.py +++ b/src/pystencils/backend/emission/base_printer.py @@ -1,8 +1,9 @@ from __future__ import annotations from enum import Enum from abc import ABC, abstractmethod +from typing import TYPE_CHECKING -from ...target import Target +from ...codegen import Target from ..ast.structural import ( PsAstNode, @@ -59,7 +60,8 @@ from ..memory import PsSymbol from ..constants import PsConstant from ...types import PsType -from ..kernelfunction import KernelFunction, GpuKernelFunction +if TYPE_CHECKING: + from ...codegen import Kernel, GpuKernel class EmissionError(Exception): @@ -172,8 +174,8 @@ class BasePrinter(ABC): def __init__(self, indent_width=3): self._indent_width = indent_width - def __call__(self, obj: PsAstNode | KernelFunction) -> str: - if isinstance(obj, KernelFunction): + def __call__(self, obj: PsAstNode | Kernel) -> str: + if isinstance(obj, Kernel): sig = self.print_signature(obj) body_code = self.visit(obj.body, PrinterCtx()) return f"{sig}\n{body_code}" @@ -372,7 +374,7 @@ class BasePrinter(ABC): f"BasePrinter does not know how to print {type(node)}" ) - def print_signature(self, func: KernelFunction) -> str: + def print_signature(self, func: Kernel) -> str: prefix = self._func_prefix(func) params_str = ", ".join( f"{self._type_str(p.dtype)} {p.name}" for p in func.parameters @@ -380,8 +382,8 @@ class BasePrinter(ABC): signature = " ".join([prefix, "void", func.name, f"({params_str})"]) return signature - def _func_prefix(self, func: KernelFunction): - if isinstance(func, GpuKernelFunction) and func.target == Target.CUDA: + def _func_prefix(self, func: Kernel): + if isinstance(func, GpuKernel) and func.target == Target.CUDA: return "__global__" else: return "FUNC_PREFIX" diff --git a/src/pystencils/backend/kernelcreation/context.py b/src/pystencils/backend/kernelcreation/context.py index 1cf159cf4a07b85122cb574f334b99509cee000b..bb7bd708d0d1b3315603b2b152a95cfbed98b28b 100644 --- a/src/pystencils/backend/kernelcreation/context.py +++ b/src/pystencils/backend/kernelcreation/context.py @@ -10,7 +10,6 @@ from ...field import Field, FieldType from ...sympyextensions.typed_sympy import TypedSymbol, DynamicType from ..memory import PsSymbol, PsBuffer -from ..properties import FieldShape, FieldStride from ..constants import PsConstant from ...types import ( PsType, @@ -371,6 +370,8 @@ class KernelCreationContext: buf_shape += [convert_size(1)] buf_strides += [convert_size(1)] + from ...codegen.properties import FieldShape, FieldStride + for i, size in enumerate(buf_shape): if isinstance(size, PsSymbol): size.add_property(FieldShape(field, i)) @@ -410,6 +411,8 @@ class KernelCreationContext: buf_shape: list[PsSymbol | PsConstant] if isinstance(buffer_len, TypedSymbol): + from ...codegen.properties import FieldShape + idx_type = self._normalize_type(buffer_len) len_symb = self.get_symbol(buffer_len.name, idx_type) len_symb.add_property(FieldShape(field, 0)) diff --git a/src/pystencils/backend/kernelcreation/iteration_space.py b/src/pystencils/backend/kernelcreation/iteration_space.py index 9df9883ce67d6d856335a7a7a9537f829b7df11e..a7802c931a5c4e9da3d008b48caa10a954130ea9 100644 --- a/src/pystencils/backend/kernelcreation/iteration_space.py +++ b/src/pystencils/backend/kernelcreation/iteration_space.py @@ -6,7 +6,6 @@ from functools import reduce from operator import mul from ...defaults import DEFAULTS -from ...config import _AUTO_TYPE, AUTO from ...simp import AssignmentCollection from ...field import Field, FieldType @@ -18,6 +17,7 @@ from ...types import PsStructType from ..exceptions import PsInputError, KernelConstraintsError if TYPE_CHECKING: + from ...codegen.config import _AUTO_TYPE from .context import KernelCreationContext @@ -457,6 +457,8 @@ def create_full_iteration_space( # Otherwise, if an iteration slice was specified, use that # Otherwise, use the inferred ghost layers + from ...codegen.config import AUTO + if ghost_layers is AUTO: if len(domain_field_accesses) > 0: inferred_gls = max( diff --git a/src/pystencils/backend/kernelfunction.py b/src/pystencils/backend/kernelfunction.py index e2161590e7023728d55b35f7355d25ea94e21438..3c7e103b34da067c82bfb85493fb5eb8059f6ff2 100644 --- a/src/pystencils/backend/kernelfunction.py +++ b/src/pystencils/backend/kernelfunction.py @@ -9,7 +9,7 @@ from .._deprecation import _deprecated from .ast.structural import PsBlock from .ast.analysis import collect_required_headers, collect_undefined_symbols from .memory import PsSymbol -from .properties import ( +from ..codegen.properties import ( PsSymbolProperty, _FieldProperty, FieldShape, @@ -22,7 +22,7 @@ from .platforms import Platform, GpuThreadsRange from .constraints import KernelParamsConstraint from ..types import PsType -from ..target import Target +from ..codegen.target import Target from ..field import Field from ..sympyextensions import TypedSymbol @@ -30,212 +30,6 @@ if TYPE_CHECKING: from .jit import JitBase -class KernelParameter: - """Parameter to a `KernelFunction`.""" - - __match_args__ = ("name", "dtype", "properties") - - def __init__( - self, name: str, dtype: PsType, properties: Iterable[PsSymbolProperty] = () - ): - self._name = name - self._dtype = dtype - self._properties: frozenset[PsSymbolProperty] = ( - frozenset(properties) if properties is not None else frozenset() - ) - self._fields: tuple[Field, ...] = tuple( - sorted( - set( - p.field # type: ignore - for p in filter( - lambda p: isinstance(p, _FieldProperty), self._properties - ) - ), - key=lambda f: f.name - ) - ) - - @property - def name(self): - return self._name - - @property - def dtype(self): - return self._dtype - - def _hashable_contents(self): - return (self._name, self._dtype, self._properties) - - # TODO: Need? - def __hash__(self) -> int: - return hash(self._hashable_contents()) - - def __eq__(self, other: object) -> bool: - if not isinstance(other, KernelParameter): - return False - - return ( - type(self) is type(other) - and self._hashable_contents() == other._hashable_contents() - ) - - def __str__(self) -> str: - return self._name - - def __repr__(self) -> str: - return f"{type(self).__name__}(name = {self._name}, dtype = {self._dtype})" - - @property - def symbol(self) -> TypedSymbol: - return TypedSymbol(self.name, self.dtype) - - @property - def fields(self) -> Sequence[Field]: - """Set of fields associated with this parameter.""" - return self._fields - - def get_properties( - self, prop_type: type[PsSymbolProperty] | tuple[type[PsSymbolProperty], ...] - ) -> set[PsSymbolProperty]: - """Retrieve all properties of the given type(s) attached to this parameter""" - return set(filter(lambda p: isinstance(p, prop_type), self._properties)) - - @property - def properties(self) -> frozenset[PsSymbolProperty]: - return self._properties - - @property - def is_field_parameter(self) -> bool: - return bool(self._fields) - - # Deprecated legacy properties - # These are kept mostly for the legacy waLBerla code generation system - - @property - def is_field_pointer(self) -> bool: - warn( - "`is_field_pointer` is deprecated and will be removed in a future version of pystencils. " - "Use `param.get_properties(FieldBasePtr)` instead.", - DeprecationWarning, - ) - return bool(self.get_properties(FieldBasePtr)) - - @property - def is_field_stride(self) -> bool: - warn( - "`is_field_stride` is deprecated and will be removed in a future version of pystencils. " - "Use `param.get_properties(FieldStride)` instead.", - DeprecationWarning, - ) - return bool(self.get_properties(FieldStride)) - - @property - def is_field_shape(self) -> bool: - warn( - "`is_field_shape` is deprecated and will be removed in a future version of pystencils. " - "Use `param.get_properties(FieldShape)` instead.", - DeprecationWarning, - ) - return bool(self.get_properties(FieldShape)) - - @property - def field_name(self) -> str: - warn( - "`field_name` is deprecated and will be removed in a future version of pystencils. " - "Use `param.fields[0].name` instead.", - DeprecationWarning, - ) - return self._fields[0].name - - -class KernelFunction: - """A pystencils kernel function. - - The kernel function is the final result of the translation process. - It is immutable, and its AST should not be altered any more, either, as this - might invalidate information about the kernel already stored in the `KernelFunction` object. - """ - - def __init__( - self, - body: PsBlock, - target: Target, - name: str, - parameters: Sequence[KernelParameter], - required_headers: set[str], - constraints: Sequence[KernelParamsConstraint], - jit: JitBase, - ): - self._body: PsBlock = body - self._target = target - self._name = name - self._params = tuple(parameters) - self._required_headers = required_headers - self._constraints = tuple(constraints) - self._jit = jit - self._metadata: dict[str, Any] = dict() - - @property - def metadata(self) -> dict[str, Any]: - return self._metadata - - @property - def body(self) -> PsBlock: - return self._body - - @property - def target(self) -> Target: - return self._target - - @property - def name(self) -> str: - return self._name - - @name.setter - def name(self, n: str): - self._name = n - - @property - def function_name(self) -> str: - _deprecated("function_name", "name") - return self._name - - @function_name.setter - def function_name(self, n: str): - _deprecated("function_name", "name") - self._name = n - - @property - def parameters(self) -> tuple[KernelParameter, ...]: - return self._params - - def get_parameters(self) -> tuple[KernelParameter, ...]: - _deprecated("KernelFunction.get_parameters", "KernelFunction.parameters") - return self.parameters - - def get_fields(self) -> set[Field]: - return set(chain.from_iterable(p.fields for p in self._params)) - - @property - def fields_accessed(self) -> set[Field]: - warn( - "`fields_accessed` is deprecated and will be removed in a future version of pystencils. " - "Use `get_fields` instead.", - DeprecationWarning, - ) - return self.get_fields() - - @property - def required_headers(self) -> set[str]: - return self._required_headers - - @property - def constraints(self) -> tuple[KernelParamsConstraint, ...]: - return self._constraints - - def compile(self) -> Callable[..., None]: - """Invoke the underlying just-in-time compiler to obtain the kernel as an executable Python function.""" - return self._jit.compile(self) def create_cpu_kernel_function( @@ -258,29 +52,6 @@ def create_cpu_kernel_function( return kfunc -class GpuKernelFunction(KernelFunction): - """Internal representation of a kernel function targeted at CUDA GPUs.""" - - def __init__( - self, - body: PsBlock, - threads_range: GpuThreadsRange | None, - target: Target, - name: str, - parameters: Sequence[KernelParameter], - required_headers: set[str], - constraints: Sequence[KernelParamsConstraint], - jit: JitBase, - ): - super().__init__( - body, target, name, parameters, required_headers, constraints, jit - ) - self._threads_range = threads_range - - @property - def threads_range(self) -> GpuThreadsRange | None: - """Object exposing the total size of the launch grid this kernel expects to be executed with.""" - return self._threads_range def create_gpu_kernel_function( diff --git a/src/pystencils/backend/memory.py b/src/pystencils/backend/memory.py index fcfae9f908da2da9cc01f422d285979c02a7a26e..7a5d62f691d81a0f251329c47216f65a981ef291 100644 --- a/src/pystencils/backend/memory.py +++ b/src/pystencils/backend/memory.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from ..types import PsType, PsTypeError, deconstify, PsIntegerType, PsPointerType from .exceptions import PsInternalCompilerError from .constants import PsConstant -from .properties import PsSymbolProperty, UniqueSymbolProperty +from ..codegen.properties import PsSymbolProperty, UniqueSymbolProperty class PsSymbol: diff --git a/src/pystencils/backend/platforms/cuda.py b/src/pystencils/backend/platforms/cuda.py index dbade47d1b8fafcb55c12be0b07d674f6edb3ce3..048bcb0d55743eb46344a5e255c778cba7f40854 100644 --- a/src/pystencils/backend/platforms/cuda.py +++ b/src/pystencils/backend/platforms/cuda.py @@ -1,4 +1,5 @@ from warnings import warn +from typing import TYPE_CHECKING from ...types import constify from ..exceptions import MaterializationError @@ -26,7 +27,9 @@ from ..ast.expressions import PsLt, PsAnd from ...types import PsSignedIntegerType, PsIeeeFloatType from ..literals import PsLiteral from ..functions import PsMathFunction, MathFunctions, CFunction -from ...config import GpuIndexingConfig + +if TYPE_CHECKING: + from ...codegen.config import GpuIndexingConfig int32 = PsSignedIntegerType(width=32, const=False) diff --git a/src/pystencils/backend/platforms/sycl.py b/src/pystencils/backend/platforms/sycl.py index ec5e7eda05d0417a764d26294206c6c0dcf7d02d..56615af24e29ea0e65c18b612a6e7652d1b69735 100644 --- a/src/pystencils/backend/platforms/sycl.py +++ b/src/pystencils/backend/platforms/sycl.py @@ -1,3 +1,5 @@ +from typing import TYPE_CHECKING + from ..functions import CFunction, PsMathFunction, MathFunctions from ..kernelcreation.iteration_space import ( IterationSpace, @@ -25,7 +27,9 @@ from ..constants import PsConstant from .generic_gpu import GenericGpu, GpuThreadsRange from ..exceptions import MaterializationError from ...types import PsCustomType, PsIeeeFloatType, constify, PsIntegerType -from ...config import GpuIndexingConfig + +if TYPE_CHECKING: + from ...codegen.config import GpuIndexingConfig class SyclPlatform(GenericGpu): diff --git a/src/pystencils/backend/transformations/add_pragmas.py b/src/pystencils/backend/transformations/add_pragmas.py index d01f428744eb0c151f473c9fcdcd8e9bdb0cc271..47c00881982b994d6f1ed2b650a6de119dd5e24c 100644 --- a/src/pystencils/backend/transformations/add_pragmas.py +++ b/src/pystencils/backend/transformations/add_pragmas.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import TYPE_CHECKING from typing import Sequence from collections import defaultdict @@ -8,7 +9,8 @@ from ..ast import PsAstNode from ..ast.structural import PsBlock, PsLoop, PsPragma from ..ast.expressions import PsExpression -from ...config import OpenMpConfig +if TYPE_CHECKING: + from ...codegen.config import OpenMpConfig __all__ = ["InsertPragmasAtLoops", "LoopPragma", "AddOpenMP"] diff --git a/src/pystencils/boundaries/boundaryhandling.py b/src/pystencils/boundaries/boundaryhandling.py index fe8dd7d0059940841277f954cc322a42d2d744b6..1f6e3d126365de0af08ee98ddd26d1600af15027 100644 --- a/src/pystencils/boundaries/boundaryhandling.py +++ b/src/pystencils/boundaries/boundaryhandling.py @@ -12,7 +12,7 @@ from pystencils.types import PsIntegerType from pystencils.types.quick import Arr, SInt from pystencils.gpu.gpu_array_handler import GPUArrayHandler from pystencils.field import Field, FieldType -from pystencils.backend.properties import FieldBasePtr +from pystencils.codegen.properties import FieldBasePtr try: # noinspection PyPep8Naming diff --git a/src/pystencils/codegen/__init__.py b/src/pystencils/codegen/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be9fd9510877bb4096c89944f7067b6f342e6fa9 --- /dev/null +++ b/src/pystencils/codegen/__init__.py @@ -0,0 +1,23 @@ +from .target import Target +from .config import ( + CreateKernelConfig, + CpuOptimConfig, + VectorizationConfig, + OpenMpConfig, + GpuIndexingConfig, +) + +from .kernel import Kernel +from .driver import create_kernel, get_driver + +__all__ = [ + "Target", + "CreateKernelConfig", + "CpuOptimConfig", + "VectorizationConfig", + "OpenMpConfig", + "GpuIndexingConfig", + "Kernel", + "create_kernel", + "get_driver", +] \ No newline at end of file diff --git a/src/pystencils/config.py b/src/pystencils/codegen/config.py similarity index 98% rename from src/pystencils/config.py rename to src/pystencils/codegen/config.py index c08ddc16198627adba97169e02724371eca62158..05e3ec3de90e33565e4a3b05e71e06c95f575f2e 100644 --- a/src/pystencils/config.py +++ b/src/pystencils/codegen/config.py @@ -8,9 +8,9 @@ from typing import Sequence from dataclasses import dataclass, InitVar, replace from .target import Target -from .field import Field, FieldType +from ..field import Field, FieldType -from .types import ( +from ..types import ( PsIntegerType, UserTypeSpec, PsIeeeFloatType, @@ -18,10 +18,10 @@ from .types import ( create_type, ) -from .defaults import DEFAULTS +from ..defaults import DEFAULTS if TYPE_CHECKING: - from .backend.jit import JitBase + from ..backend.jit import JitBase class PsOptionsError(Exception): @@ -336,12 +336,12 @@ class CreateKernelConfig: """Returns either the user-specified JIT compiler, or infers one from the target if none is given.""" if self.jit is None: if self.target.is_cpu(): - from .backend.jit import LegacyCpuJit + from ..backend.jit import LegacyCpuJit return LegacyCpuJit() elif self.target == Target.CUDA: try: - from .backend.jit.gpu_cupy import CupyJit + from ..backend.jit.gpu_cupy import CupyJit if ( self.gpu_indexing is not None @@ -352,12 +352,12 @@ class CreateKernelConfig: return CupyJit() except ImportError: - from .backend.jit import no_jit + from ..backend.jit import no_jit return no_jit elif self.target == Target.SYCL: - from .backend.jit import no_jit + from ..backend.jit import no_jit return no_jit else: diff --git a/src/pystencils/codegen/driver.py b/src/pystencils/codegen/driver.py new file mode 100644 index 0000000000000000000000000000000000000000..bc690a598000ca2227fb9ca8ed49f8c0a40f29de --- /dev/null +++ b/src/pystencils/codegen/driver.py @@ -0,0 +1,432 @@ +from __future__ import annotations + +from typing import cast, Sequence +from dataclasses import dataclass, replace + +from .target import Target +from .config import CreateKernelConfig, OpenMpConfig, VectorizationConfig, AUTO +from .kernel import Kernel + +from ..types import create_numeric_type, PsIntegerType, PsScalarType +from ..backend.ast import PsAstNode +from ..backend.ast.structural import PsBlock, PsLoop +from ..backend.kernelcreation import ( + KernelCreationContext, + KernelAnalysis, + FreezeExpressions, + Typifier, +) +from ..backend.constants import PsConstant +from ..backend.kernelcreation.iteration_space import ( + create_sparse_iteration_space, + create_full_iteration_space, + FullIterationSpace, +) +from ..backend.platforms import Platform, GenericCpu, GenericVectorCpu, GenericGpu +from ..backend.exceptions import VectorizationError + +from ..backend.transformations import ( + EliminateConstants, + LowerToC, + SelectFunctions, + CanonicalizeSymbols, + HoistLoopInvariantDeclarations, +) + +from ..simp import AssignmentCollection +from sympy.codegen.ast import AssignmentBase + + +__all__ = ["create_kernel"] + + +def create_kernel( + assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, + config: CreateKernelConfig | None = None, + **kwargs, +) -> Kernel: + """Create a kernel function from a set of assignments. + + Args: + assignments: The kernel's sequence of assignments, expressed using SymPy + config: The configuration for the kernel translator + kwargs: If ``config`` is not set, it is created from the keyword arguments; + if it is set, its option will be overridden by any keyword arguments. + + Returns: + The numerical kernel in pystencil's internal representation, ready to be + exported or compiled + """ + + if not config: + config = CreateKernelConfig() + + if kwargs: + config = replace(config, **kwargs) + + driver = DefaultKernelCreationDriver(config) + return driver(assignments) + + +def get_driver(cfg: CreateKernelConfig, *, retain_intermediates: bool = False): + return DefaultKernelCreationDriver(cfg, retain_intermediates) + + +class DefaultKernelCreationDriver: + def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False): + self._cfg = cfg + + idx_dtype = create_numeric_type(self._cfg.index_dtype) + assert isinstance(idx_dtype, PsIntegerType) + + self._ctx = KernelCreationContext( + default_dtype=create_numeric_type(self._cfg.default_dtype), + index_dtype=idx_dtype, + ) + + self._target = self._cfg.get_target() + self._platform = self._get_platform() + + if retain_intermediates: + self._intermediates = CodegenIntermediates() + else: + self._intermediates = None + + @property + def intermediates(self) -> CodegenIntermediates | None: + return self._intermediates + + def __call__( + self, + assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, + ): + kernel_body = self.parse_kernel_body(assignments) + + match self._platform: + case GenericCpu(): + kernel_ast = self._platform.materialize_iteration_space( + kernel_body, self._ctx.get_iteration_space() + ) + case GenericGpu(): + kernel_ast, gpu_threads = self._platform.materialize_iteration_space( + kernel_body, self._ctx.get_iteration_space() + ) + + if self._intermediates is not None: + self._intermediates.materialized_ispace = kernel_ast.clone() + + # Fold and extract constants + elim_constants = EliminateConstants(self._ctx, extract_constant_exprs=True) + kernel_ast = cast(PsBlock, elim_constants(kernel_ast)) + + if self._intermediates is not None: + self._intermediates.constants_eliminated = kernel_ast.clone() + + # Target-Specific optimizations + if self._cfg.target.is_cpu(): + kernel_ast = self._transform_for_cpu(kernel_ast) + + # Note: After this point, the AST may contain intrinsics, so type-dependent + # transformations cannot be run any more + + # Lowering + lower_to_c = LowerToC(self._ctx) + kernel_ast = cast(PsBlock, lower_to_c(kernel_ast)) + + select_functions = SelectFunctions(self._platform) + kernel_ast = cast(PsBlock, select_functions(kernel_ast)) + + if self._intermediates is not None: + self._intermediates.lowered = kernel_ast.clone() + + # Late canonicalization pass: Canonicalize new symbols introduced by LowerToC + + canonicalize = CanonicalizeSymbols(self._ctx, True) + kernel_ast = cast(PsBlock, canonicalize(kernel_ast)) + + if self._cfg.target.is_cpu(): + return create_cpu_kernel_function( + self._ctx, + self._platform, + kernel_ast, + self._cfg.function_name, + self._cfg.target, + self._cfg.get_jit(), + ) + else: + return create_gpu_kernel_function( + self._ctx, + self._platform, + kernel_ast, + gpu_threads, + self._cfg.function_name, + self._cfg.target, + self._cfg.get_jit(), + ) + + def parse_kernel_body( + self, + assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, + ) -> PsBlock: + if isinstance(assignments, AssignmentBase): + assignments = [assignments] + + if not isinstance(assignments, AssignmentCollection): + assignments = AssignmentCollection(assignments) # type: ignore + + _ = _parse_simplification_hints(assignments) + + analysis = KernelAnalysis( + self._ctx, + not self._cfg.skip_independence_check, + not self._cfg.allow_double_writes, + ) + analysis(assignments) + + if self._cfg.index_field is not None: + ispace = create_sparse_iteration_space( + self._ctx, assignments, index_field=self._cfg.index_field + ) + else: + gls = self._cfg.ghost_layers + islice = self._cfg.iteration_slice + + if gls is None and islice is None: + gls = AUTO + + ispace = create_full_iteration_space( + self._ctx, + assignments, + ghost_layers=gls, + iteration_slice=islice, + ) + + self._ctx.set_iteration_space(ispace) + + freeze = FreezeExpressions(self._ctx) + kernel_body = freeze(assignments) + + typify = Typifier(self._ctx) + kernel_body = typify(kernel_body) + + if self._intermediates is not None: + self._intermediates.parsed_body = kernel_body.clone() + + return kernel_body + + def _transform_for_cpu(self, kernel_ast: PsBlock): + canonicalize = CanonicalizeSymbols(self._ctx, True) + kernel_ast = cast(PsBlock, canonicalize(kernel_ast)) + + if self._intermediates is not None: + self._intermediates.cpu_canonicalize = kernel_ast.clone() + + hoist_invariants = HoistLoopInvariantDeclarations(self._ctx) + kernel_ast = cast(PsBlock, hoist_invariants(kernel_ast)) + + if self._intermediates is not None: + self._intermediates.cpu_hoist_invariants = kernel_ast.clone() + + cpu_cfg = self._cfg.cpu_optim + + if cpu_cfg is None: + return kernel_ast + + if cpu_cfg.loop_blocking: + raise NotImplementedError("Loop blocking not implemented yet.") + + kernel_ast = self._vectorize(kernel_ast) + + if cpu_cfg.openmp is not False: + from .backend.transformations import AddOpenMP + + params = ( + cpu_cfg.openmp + if isinstance(cpu_cfg.openmp, OpenMpConfig) + else OpenMpConfig() + ) + add_omp = AddOpenMP(self._ctx, params) + kernel_ast = cast(PsBlock, add_omp(kernel_ast)) + + if self._intermediates is not None: + self._intermediates.cpu_openmp = kernel_ast.clone() + + if cpu_cfg.use_cacheline_zeroing: + raise NotImplementedError("CL-zeroing not implemented yet") + + return kernel_ast + + def _vectorize(self, kernel_ast: PsBlock) -> PsBlock: + assert self._cfg.cpu_optim is not None + vec_config = self._cfg.cpu_optim.get_vectorization_config() + if vec_config is None: + return kernel_ast + + from .backend.transformations import LoopVectorizer, SelectIntrinsics + + assert isinstance(self._platform, GenericVectorCpu) + + ispace = self._ctx.get_iteration_space() + if not isinstance(ispace, FullIterationSpace): + raise VectorizationError( + "Unable to vectorize kernel: The kernel is not using a dense iteration space." + ) + + inner_loop_coord = ispace.loop_order[-1] + inner_loop_dim = ispace.dimensions[inner_loop_coord] + + # Apply stride (TODO: and alignment) assumptions + if vec_config.assume_inner_stride_one: + for field in self._ctx.fields: + buf = self._ctx.get_buffer(field) + inner_stride = buf.strides[inner_loop_coord] + if isinstance(inner_stride, PsConstant): + if inner_stride.value != 1: + raise VectorizationError( + f"Unable to apply assumption 'assume_inner_stride_one': " + f"Field {field} has fixed stride {inner_stride} " + f"set in the inner coordinate {inner_loop_coord}." + ) + else: + buf.strides[inner_loop_coord] = PsConstant(1, buf.index_type) + # TODO: Communicate assumption to runtime system via a precondition + + # Call loop vectorizer + if vec_config.lanes is None: + lanes = VectorizationConfig.default_lanes( + self._target, cast(PsScalarType, self._ctx.default_dtype) + ) + else: + lanes = vec_config.lanes + + vectorizer = LoopVectorizer(self._ctx, lanes) + + def loop_predicate(loop: PsLoop): + return loop.counter.symbol == inner_loop_dim.counter + + kernel_ast = vectorizer.vectorize_select_loops(kernel_ast, loop_predicate) + + if self._intermediates is not None: + self._intermediates.cpu_vectorize = kernel_ast.clone() + + select_intrin = SelectIntrinsics(self._ctx, self._platform) + kernel_ast = cast(PsBlock, select_intrin(kernel_ast)) + + if self._intermediates is not None: + self._intermediates.cpu_select_intrins = kernel_ast.clone() + + return kernel_ast + + def _get_platform(self) -> Platform: + if Target._CPU in self._target: + if Target._X86 in self._target: + from ..backend.platforms.x86 import X86VectorArch, X86VectorCpu + + arch: X86VectorArch + + if Target._SSE in self._target: + arch = X86VectorArch.SSE + elif Target._AVX in self._target: + arch = X86VectorArch.AVX + elif Target._AVX512 in self._target: + if Target._FP16 in self._target: + arch = X86VectorArch.AVX512_FP16 + else: + arch = X86VectorArch.AVX512 + else: + assert False, "unreachable code" + + return X86VectorCpu(self._ctx, arch) + elif self._target == Target.GenericCPU: + return GenericCpu(self._ctx) + else: + raise NotImplementedError( + f"No platform is currently available for CPU target {self._target}" + ) + + elif Target._GPU in self._target: + match self._target: + case Target.SYCL: + from ..backend.platforms import SyclPlatform + + return SyclPlatform(self._ctx, self._cfg.gpu_indexing) + case Target.CUDA: + from ..backend.platforms import CudaPlatform + + return CudaPlatform(self._ctx, self._cfg.gpu_indexing) + + raise NotImplementedError( + f"Code generation for target {self._target} not implemented" + ) + + +@dataclass +class StageResult: + ast: PsAstNode + label: str + + +class StageResultSlot: + def __init__(self, description: str | None = None): + self._description = description + self._name: str + self._lookup: str + + def __set_name__(self, owner, name: str): + self._name = name + self._lookup = f"_{name}" + + def __get__(self, obj, objtype=None) -> StageResult | None: + if obj is None: + return None + + ast = getattr(obj, self._lookup, None) + if ast is not None: + descr = self._name if self._description is None else self._description + return StageResult(ast, descr) + else: + return None + + def __set__(self, obj, val: PsAstNode | None): + setattr(obj, self._lookup, val) + + +class CodegenIntermediates: + """Intermediate results produced by the code generator.""" + + parsed_body = StageResultSlot("Freeze & Type Deduction") + materialized_ispace = StageResultSlot("Iteration Space Materialization") + constants_eliminated = StageResultSlot("Constant Elimination") + cpu_canonicalize = StageResultSlot("CPU: Symbol Canonicalization") + cpu_hoist_invariants = StageResultSlot("CPU: Hoisting of Loop Invariants") + cpu_vectorize = StageResultSlot("CPU: Vectorization") + cpu_select_intrins = StageResultSlot("CPU: Intrinsics Selection") + cpu_openmp = StageResultSlot("CPU: OpenMP Instrumentation") + lowered = StageResultSlot("C Language Lowering") + + @property + def available_stages(self) -> Sequence[StageResult]: + all_results: list[StageResult | None] = [ + getattr(self, name) + for name, slot in CodegenIntermediates.__dict__.items() + if isinstance(slot, StageResultSlot) + ] + return tuple(filter(lambda r: r is not None, all_results)) # type: ignore + + +def create_staggered_kernel( + assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs +): + raise NotImplementedError( + "Staggered kernels are not yet implemented for pystencils 2.0" + ) + + +# Internals + + +def _parse_simplification_hints(ac: AssignmentCollection): + if "split_groups" in ac.simplification_hints: + raise NotImplementedError( + "Loop splitting was requested, but is not implemented yet" + ) diff --git a/src/pystencils/codegen/gpu.py b/src/pystencils/codegen/gpu.py new file mode 100644 index 0000000000000000000000000000000000000000..9cce9b55bbc838e98bc739eb2533e9626894c3a2 --- /dev/null +++ b/src/pystencils/codegen/gpu.py @@ -0,0 +1,28 @@ + + +from .kernel import Kernel + + +class GpuKernel(Kernel): + """Internal representation of a kernel function targeted at CUDA GPUs.""" + + def __init__( + self, + body: PsBlock, + threads_range: GpuThreadsRange | None, + target: Target, + name: str, + parameters: Sequence[KernelParameter], + required_headers: set[str], + constraints: Sequence[KernelParamsConstraint], + jit: JitBase, + ): + super().__init__( + body, target, name, parameters, required_headers, constraints, jit + ) + self._threads_range = threads_range + + @property + def threads_range(self) -> GpuThreadsRange | None: + """Object exposing the total size of the launch grid this kernel expects to be executed with.""" + return self._threads_range diff --git a/src/pystencils/codegen/kernel.py b/src/pystencils/codegen/kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..6a0a6d57678d8d3cd7d53ffbf123d1ae032ced79 --- /dev/null +++ b/src/pystencils/codegen/kernel.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from warnings import warn +from typing import Callable, Sequence, Iterable, Any, TYPE_CHECKING +from itertools import chain + +from .._deprecation import _deprecated + +from ..backend.ast.structural import PsBlock +from ..backend.ast.analysis import collect_required_headers, collect_undefined_symbols +from ..backend.memory import PsSymbol + +from ..types import PsType + +from .target import Target +from .parameters import Parameter +from ..field import Field +from ..sympyextensions import TypedSymbol + + +class Kernel: + """A pystencils kernel. + + The kernel object is the final result of the translation process. + It is immutable, and its AST should not be altered any more, either, as this + might invalidate information about the kernel already stored in the `KernelFunction` object. + """ + + def __init__( + self, + body: PsBlock, + target: Target, + name: str, + parameters: Sequence[Parameter], + required_headers: set[str], + jit: JitBase, + ): + self._body: PsBlock = body + self._target = target + self._name = name + self._params = tuple(parameters) + self._required_headers = required_headers + self._jit = jit + self._metadata: dict[str, Any] = dict() + + @property + def metadata(self) -> dict[str, Any]: + return self._metadata + + @property + def body(self) -> PsBlock: + return self._body + + @property + def target(self) -> Target: + return self._target + + @property + def name(self) -> str: + return self._name + + @name.setter + def name(self, n: str): + self._name = n + + @property + def function_name(self) -> str: + _deprecated("function_name", "name") + return self._name + + @function_name.setter + def function_name(self, n: str): + _deprecated("function_name", "name") + self._name = n + + @property + def parameters(self) -> tuple[Parameter, ...]: + return self._params + + def get_parameters(self) -> tuple[Parameter, ...]: + _deprecated("KernelFunction.get_parameters", "KernelFunction.parameters") + return self.parameters + + def get_fields(self) -> set[Field]: + return set(chain.from_iterable(p.fields for p in self._params)) + + @property + def fields_accessed(self) -> set[Field]: + warn( + "`fields_accessed` is deprecated and will be removed in a future version of pystencils. " + "Use `get_fields` instead.", + DeprecationWarning, + ) + return self.get_fields() + + @property + def required_headers(self) -> set[str]: + return self._required_headers + + def compile(self) -> Callable[..., None]: + """Invoke the underlying just-in-time compiler to obtain the kernel as an executable Python function.""" + return self._jit.compile(self) diff --git a/src/pystencils/codegen/parameters.py b/src/pystencils/codegen/parameters.py new file mode 100644 index 0000000000000000000000000000000000000000..1e01e07aa5c3c1f9c675b8b30baa09f75290984e --- /dev/null +++ b/src/pystencils/codegen/parameters.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +from warnings import warn +from typing import Callable, Sequence, Iterable, Any, TYPE_CHECKING +from itertools import chain + +from .._deprecation import _deprecated + +from ..backend.ast.structural import PsBlock +from ..backend.ast.analysis import collect_required_headers, collect_undefined_symbols +from ..backend.memory import PsSymbol +from .properties import ( + PsSymbolProperty, + _FieldProperty, + FieldShape, + FieldStride, + FieldBasePtr, +) + +from ..types import PsType + +from .target import Target +from ..field import Field +from ..sympyextensions import TypedSymbol + + +class Parameter: + """Parameter to a `KernelFunction`.""" + + __match_args__ = ("name", "dtype", "properties") + + def __init__( + self, name: str, dtype: PsType, properties: Iterable[PsSymbolProperty] = () + ): + self._name = name + self._dtype = dtype + self._properties: frozenset[PsSymbolProperty] = ( + frozenset(properties) if properties is not None else frozenset() + ) + self._fields: tuple[Field, ...] = tuple( + sorted( + set( + p.field # type: ignore + for p in filter( + lambda p: isinstance(p, _FieldProperty), self._properties + ) + ), + key=lambda f: f.name + ) + ) + + @property + def name(self): + return self._name + + @property + def dtype(self): + return self._dtype + + def _hashable_contents(self): + return (self._name, self._dtype, self._properties) + + # TODO: Need? + def __hash__(self) -> int: + return hash(self._hashable_contents()) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Parameter): + return False + + return ( + type(self) is type(other) + and self._hashable_contents() == other._hashable_contents() + ) + + def __str__(self) -> str: + return self._name + + def __repr__(self) -> str: + return f"{type(self).__name__}(name = {self._name}, dtype = {self._dtype})" + + @property + def symbol(self) -> TypedSymbol: + return TypedSymbol(self.name, self.dtype) + + @property + def fields(self) -> Sequence[Field]: + """Set of fields associated with this parameter.""" + return self._fields + + def get_properties( + self, prop_type: type[PsSymbolProperty] | tuple[type[PsSymbolProperty], ...] + ) -> set[PsSymbolProperty]: + """Retrieve all properties of the given type(s) attached to this parameter""" + return set(filter(lambda p: isinstance(p, prop_type), self._properties)) + + @property + def properties(self) -> frozenset[PsSymbolProperty]: + return self._properties + + @property + def is_field_parameter(self) -> bool: + return bool(self._fields) + + # Deprecated legacy properties + # These are kept mostly for the legacy waLBerla code generation system + + @property + def is_field_pointer(self) -> bool: + warn( + "`is_field_pointer` is deprecated and will be removed in a future version of pystencils. " + "Use `param.get_properties(FieldBasePtr)` instead.", + DeprecationWarning, + ) + return bool(self.get_properties(FieldBasePtr)) + + @property + def is_field_stride(self) -> bool: + warn( + "`is_field_stride` is deprecated and will be removed in a future version of pystencils. " + "Use `param.get_properties(FieldStride)` instead.", + DeprecationWarning, + ) + return bool(self.get_properties(FieldStride)) + + @property + def is_field_shape(self) -> bool: + warn( + "`is_field_shape` is deprecated and will be removed in a future version of pystencils. " + "Use `param.get_properties(FieldShape)` instead.", + DeprecationWarning, + ) + return bool(self.get_properties(FieldShape)) + + @property + def field_name(self) -> str: + warn( + "`field_name` is deprecated and will be removed in a future version of pystencils. " + "Use `param.fields[0].name` instead.", + DeprecationWarning, + ) + return self._fields[0].name \ No newline at end of file diff --git a/src/pystencils/backend/properties.py b/src/pystencils/codegen/properties.py similarity index 100% rename from src/pystencils/backend/properties.py rename to src/pystencils/codegen/properties.py diff --git a/src/pystencils/target.py b/src/pystencils/codegen/target.py similarity index 100% rename from src/pystencils/target.py rename to src/pystencils/codegen/target.py diff --git a/src/pystencils/datahandling/__init__.py b/src/pystencils/datahandling/__init__.py index 76a494255a69c7cb880d362ff6eb1835a8f1e33a..ff1a12c96d749b9d5405a62f606ab1ca77e06de5 100644 --- a/src/pystencils/datahandling/__init__.py +++ b/src/pystencils/datahandling/__init__.py @@ -3,7 +3,7 @@ import warnings from typing import Tuple, Union from .datahandling_interface import DataHandling -from ..target import Target +from ..codegen.target import Target from .serial_datahandling import SerialDataHandling try: diff --git a/src/pystencils/datahandling/datahandling_interface.py b/src/pystencils/datahandling/datahandling_interface.py index f42c4ef138e04eede2719cbce69f9975b656cb30..867bbf062d7307187c6a72f465fe54177bbfacc1 100644 --- a/src/pystencils/datahandling/datahandling_interface.py +++ b/src/pystencils/datahandling/datahandling_interface.py @@ -3,7 +3,7 @@ from typing import Callable, Dict, Iterable, Optional, Sequence, Tuple, Union import numpy as np -from pystencils.target import Target +from ..codegen import Target from pystencils.field import Field, FieldType diff --git a/src/pystencils/datahandling/parallel_datahandling.py b/src/pystencils/datahandling/parallel_datahandling.py index f3f7305228f6ba2c68f04c39c05ddb6c8ff7610c..8c7ce6e628f44b0a40cbe6c91e2605eb08176c23 100644 --- a/src/pystencils/datahandling/parallel_datahandling.py +++ b/src/pystencils/datahandling/parallel_datahandling.py @@ -9,7 +9,7 @@ from pystencils.datahandling.blockiteration import block_iteration, sliced_block from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.field import Field, FieldType from pystencils.utils import DotDict -from pystencils.backend.properties import FieldBasePtr +from pystencils.codegen.properties import FieldBasePtr from pystencils import Target diff --git a/src/pystencils/datahandling/serial_datahandling.py b/src/pystencils/datahandling/serial_datahandling.py index 6a5ce573085b0380196e208c7d19ec16cf5fbb37..73b749ca46171c86f6183789bff2731efb8a1a5d 100644 --- a/src/pystencils/datahandling/serial_datahandling.py +++ b/src/pystencils/datahandling/serial_datahandling.py @@ -6,7 +6,7 @@ import numpy as np from pystencils.datahandling.blockiteration import SerialBlock from pystencils.datahandling.datahandling_interface import DataHandling -from pystencils.target import Target +from ..codegen import Target from pystencils.field import (Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, spatial_layout_string_to_tuple) from pystencils.gpu.gpu_array_handler import GPUArrayHandler, GPUNotAvailableHandler diff --git a/src/pystencils/enums.py b/src/pystencils/enums.py index 86048059d67e6132223825e3b94588b35e30796e..bcea50e84cfba9190a2353245c7c29168443ca13 100644 --- a/src/pystencils/enums.py +++ b/src/pystencils/enums.py @@ -1,4 +1,4 @@ -from .target import Target as _Target +from .codegen import Target as _Target from warnings import warn diff --git a/src/pystencils/backend/jit/__init__.py b/src/pystencils/jit/__init__.py similarity index 100% rename from src/pystencils/backend/jit/__init__.py rename to src/pystencils/jit/__init__.py diff --git a/src/pystencils/backend/jit/cpu_extension_module.py b/src/pystencils/jit/cpu_extension_module.py similarity index 99% rename from src/pystencils/backend/jit/cpu_extension_module.py rename to src/pystencils/jit/cpu_extension_module.py index 4412f8879a346d5c3635271e9d3700fed041435f..444167f9d06f281533b2b51c61b736c2924c6118 100644 --- a/src/pystencils/backend/jit/cpu_extension_module.py +++ b/src/pystencils/jit/cpu_extension_module.py @@ -14,7 +14,7 @@ from ..kernelfunction import ( KernelFunction, KernelParameter, ) -from ..properties import FieldBasePtr, FieldShape, FieldStride +from ...codegen.properties import FieldBasePtr, FieldShape, FieldStride from ..constraints import KernelParamsConstraint from ...types import ( PsType, diff --git a/src/pystencils/backend/jit/gpu_cupy.py b/src/pystencils/jit/gpu_cupy.py similarity index 99% rename from src/pystencils/backend/jit/gpu_cupy.py rename to src/pystencils/jit/gpu_cupy.py index 1dd18767160a626ff7972ebb78f83bb3e64a1efc..2f5753e0528f75c2a01e34a44d06b2d10f96de3d 100644 --- a/src/pystencils/backend/jit/gpu_cupy.py +++ b/src/pystencils/jit/gpu_cupy.py @@ -8,7 +8,7 @@ try: except ImportError: HAVE_CUPY = False -from ...target import Target +from ...codegen import Target from ...field import FieldType from ...types import PsType @@ -18,7 +18,7 @@ from ..kernelfunction import ( GpuKernelFunction, KernelParameter, ) -from ..properties import FieldShape, FieldStride, FieldBasePtr +from ...codegen.properties import FieldShape, FieldStride, FieldBasePtr from ..emission import emit_code from ...types import PsStructType diff --git a/src/pystencils/backend/jit/jit.py b/src/pystencils/jit/jit.py similarity index 97% rename from src/pystencils/backend/jit/jit.py rename to src/pystencils/jit/jit.py index 2d091c4a009f27ba1d1efb2e7bab37021ff001dd..250bba2401ff94e97e8d94f91478d3ad28ec9395 100644 --- a/src/pystencils/backend/jit/jit.py +++ b/src/pystencils/jit/jit.py @@ -4,7 +4,7 @@ from abc import ABC, abstractmethod if TYPE_CHECKING: from ..kernelfunction import KernelFunction, KernelParameter - from ...target import Target + from ...codegen.target import Target class JitError(Exception): diff --git a/src/pystencils/backend/jit/legacy_cpu.py b/src/pystencils/jit/legacy_cpu.py similarity index 100% rename from src/pystencils/backend/jit/legacy_cpu.py rename to src/pystencils/jit/legacy_cpu.py diff --git a/src/pystencils/backend/jit/msvc_detection.py b/src/pystencils/jit/msvc_detection.py similarity index 100% rename from src/pystencils/backend/jit/msvc_detection.py rename to src/pystencils/jit/msvc_detection.py diff --git a/src/pystencils/kernel_decorator.py b/src/pystencils/kernel_decorator.py index a3590d3a4bdcddb43aba31ddf943206ba7e47f84..4e18d7245ba89c3d891679e68f39208b7dae032d 100644 --- a/src/pystencils/kernel_decorator.py +++ b/src/pystencils/kernel_decorator.py @@ -7,7 +7,7 @@ import sympy as sp from .assignment import Assignment from .sympyextensions import SymbolCreator -from pystencils.config import CreateKernelConfig +from .codegen import CreateKernelConfig __all__ = ['kernel', 'kernel_config'] diff --git a/src/pystencils/kernelcreation.py b/src/pystencils/kernelcreation.py index e718f16178415eb102e6ffc01e1a35d2356aa137..9bf3eaf6756831e0afcaa6650e518ff36101bd65 100644 --- a/src/pystencils/kernelcreation.py +++ b/src/pystencils/kernelcreation.py @@ -1,420 +1,16 @@ -from __future__ import annotations +from .codegen import Target +from .codegen import create_kernel as _create_kernel -from typing import cast, Sequence -from dataclasses import dataclass, replace +from warnings import warn -from .target import Target -from .config import CreateKernelConfig, OpenMpConfig, VectorizationConfig, AUTO -from .backend import KernelFunction -from .types import create_numeric_type, PsIntegerType, PsScalarType -from .backend.ast import PsAstNode -from .backend.ast.structural import PsBlock, PsLoop -from .backend.kernelcreation import ( - KernelCreationContext, - KernelAnalysis, - FreezeExpressions, - Typifier, +warn( + "Importing anything from `pystencils.kernelcreation` is deprecated and the module will be removed in pystencils 2.1. " + "Import from `pystencils` instead.", + FutureWarning ) -from .backend.constants import PsConstant -from .backend.kernelcreation.iteration_space import ( - create_sparse_iteration_space, - create_full_iteration_space, - FullIterationSpace, -) -from .backend.platforms import Platform, GenericCpu, GenericVectorCpu, GenericGpu -from .backend.exceptions import VectorizationError - -from .backend.transformations import ( - EliminateConstants, - LowerToC, - SelectFunctions, - CanonicalizeSymbols, - HoistLoopInvariantDeclarations, -) -from .backend.kernelfunction import ( - create_cpu_kernel_function, - create_gpu_kernel_function, -) - -from .simp import AssignmentCollection -from sympy.codegen.ast import AssignmentBase - - -__all__ = ["create_kernel"] - - -def create_kernel( - assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, - config: CreateKernelConfig | None = None, - **kwargs, -) -> KernelFunction: - """Create a kernel function from a set of assignments. - - Args: - assignments: The kernel's sequence of assignments, expressed using SymPy - config: The configuration for the kernel translator - kwargs: If ``config`` is not set, it is created from the keyword arguments; - if it is set, its option will be overridden by any keyword arguments. - - Returns: - The numerical kernel in pystencil's internal representation, ready to be - exported or compiled - """ - - if not config: - config = CreateKernelConfig() - - if kwargs: - config = replace(config, **kwargs) - - driver = DefaultKernelCreationDriver(config) - return driver(assignments) - - -def get_driver(cfg: CreateKernelConfig, *, retain_intermediates: bool = False): - return DefaultKernelCreationDriver(cfg, retain_intermediates) - - -class DefaultKernelCreationDriver: - def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False): - self._cfg = cfg - - idx_dtype = create_numeric_type(self._cfg.index_dtype) - assert isinstance(idx_dtype, PsIntegerType) - - self._ctx = KernelCreationContext( - default_dtype=create_numeric_type(self._cfg.default_dtype), - index_dtype=idx_dtype, - ) - - self._target = self._cfg.get_target() - self._platform = self._get_platform() - - if retain_intermediates: - self._intermediates = CodegenIntermediates() - else: - self._intermediates = None - - @property - def intermediates(self) -> CodegenIntermediates | None: - return self._intermediates - - def __call__( - self, - assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, - ): - kernel_body = self.parse_kernel_body(assignments) - - match self._platform: - case GenericCpu(): - kernel_ast = self._platform.materialize_iteration_space( - kernel_body, self._ctx.get_iteration_space() - ) - case GenericGpu(): - kernel_ast, gpu_threads = self._platform.materialize_iteration_space( - kernel_body, self._ctx.get_iteration_space() - ) - - if self._intermediates is not None: - self._intermediates.materialized_ispace = kernel_ast.clone() - - # Fold and extract constants - elim_constants = EliminateConstants(self._ctx, extract_constant_exprs=True) - kernel_ast = cast(PsBlock, elim_constants(kernel_ast)) - - if self._intermediates is not None: - self._intermediates.constants_eliminated = kernel_ast.clone() - - # Target-Specific optimizations - if self._cfg.target.is_cpu(): - kernel_ast = self._transform_for_cpu(kernel_ast) - - # Note: After this point, the AST may contain intrinsics, so type-dependent - # transformations cannot be run any more - - # Lowering - lower_to_c = LowerToC(self._ctx) - kernel_ast = cast(PsBlock, lower_to_c(kernel_ast)) - - select_functions = SelectFunctions(self._platform) - kernel_ast = cast(PsBlock, select_functions(kernel_ast)) - - if self._intermediates is not None: - self._intermediates.lowered = kernel_ast.clone() - - # Late canonicalization pass: Canonicalize new symbols introduced by LowerToC - - canonicalize = CanonicalizeSymbols(self._ctx, True) - kernel_ast = cast(PsBlock, canonicalize(kernel_ast)) - - if self._cfg.target.is_cpu(): - return create_cpu_kernel_function( - self._ctx, - self._platform, - kernel_ast, - self._cfg.function_name, - self._cfg.target, - self._cfg.get_jit(), - ) - else: - return create_gpu_kernel_function( - self._ctx, - self._platform, - kernel_ast, - gpu_threads, - self._cfg.function_name, - self._cfg.target, - self._cfg.get_jit(), - ) - - def parse_kernel_body( - self, - assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, - ) -> PsBlock: - if isinstance(assignments, AssignmentBase): - assignments = [assignments] - - if not isinstance(assignments, AssignmentCollection): - assignments = AssignmentCollection(assignments) # type: ignore - - _ = _parse_simplification_hints(assignments) - - analysis = KernelAnalysis( - self._ctx, - not self._cfg.skip_independence_check, - not self._cfg.allow_double_writes, - ) - analysis(assignments) - - if self._cfg.index_field is not None: - ispace = create_sparse_iteration_space( - self._ctx, assignments, index_field=self._cfg.index_field - ) - else: - gls = self._cfg.ghost_layers - islice = self._cfg.iteration_slice - - if gls is None and islice is None: - gls = AUTO - - ispace = create_full_iteration_space( - self._ctx, - assignments, - ghost_layers=gls, - iteration_slice=islice, - ) - - self._ctx.set_iteration_space(ispace) - - freeze = FreezeExpressions(self._ctx) - kernel_body = freeze(assignments) - - typify = Typifier(self._ctx) - kernel_body = typify(kernel_body) - - if self._intermediates is not None: - self._intermediates.parsed_body = kernel_body.clone() - - return kernel_body - def _transform_for_cpu(self, kernel_ast: PsBlock): - canonicalize = CanonicalizeSymbols(self._ctx, True) - kernel_ast = cast(PsBlock, canonicalize(kernel_ast)) - if self._intermediates is not None: - self._intermediates.cpu_canonicalize = kernel_ast.clone() - - hoist_invariants = HoistLoopInvariantDeclarations(self._ctx) - kernel_ast = cast(PsBlock, hoist_invariants(kernel_ast)) - - if self._intermediates is not None: - self._intermediates.cpu_hoist_invariants = kernel_ast.clone() - - cpu_cfg = self._cfg.cpu_optim - - if cpu_cfg is None: - return kernel_ast - - if cpu_cfg.loop_blocking: - raise NotImplementedError("Loop blocking not implemented yet.") - - kernel_ast = self._vectorize(kernel_ast) - - if cpu_cfg.openmp is not False: - from .backend.transformations import AddOpenMP - - params = ( - cpu_cfg.openmp - if isinstance(cpu_cfg.openmp, OpenMpConfig) - else OpenMpConfig() - ) - add_omp = AddOpenMP(self._ctx, params) - kernel_ast = cast(PsBlock, add_omp(kernel_ast)) - - if self._intermediates is not None: - self._intermediates.cpu_openmp = kernel_ast.clone() - - if cpu_cfg.use_cacheline_zeroing: - raise NotImplementedError("CL-zeroing not implemented yet") - - return kernel_ast - - def _vectorize(self, kernel_ast: PsBlock) -> PsBlock: - assert self._cfg.cpu_optim is not None - vec_config = self._cfg.cpu_optim.get_vectorization_config() - if vec_config is None: - return kernel_ast - - from .backend.transformations import LoopVectorizer, SelectIntrinsics - - assert isinstance(self._platform, GenericVectorCpu) - - ispace = self._ctx.get_iteration_space() - if not isinstance(ispace, FullIterationSpace): - raise VectorizationError( - "Unable to vectorize kernel: The kernel is not using a dense iteration space." - ) - - inner_loop_coord = ispace.loop_order[-1] - inner_loop_dim = ispace.dimensions[inner_loop_coord] - - # Apply stride (TODO: and alignment) assumptions - if vec_config.assume_inner_stride_one: - for field in self._ctx.fields: - buf = self._ctx.get_buffer(field) - inner_stride = buf.strides[inner_loop_coord] - if isinstance(inner_stride, PsConstant): - if inner_stride.value != 1: - raise VectorizationError( - f"Unable to apply assumption 'assume_inner_stride_one': " - f"Field {field} has fixed stride {inner_stride} " - f"set in the inner coordinate {inner_loop_coord}." - ) - else: - buf.strides[inner_loop_coord] = PsConstant(1, buf.index_type) - # TODO: Communicate assumption to runtime system via a precondition - - # Call loop vectorizer - if vec_config.lanes is None: - lanes = VectorizationConfig.default_lanes( - self._target, cast(PsScalarType, self._ctx.default_dtype) - ) - else: - lanes = vec_config.lanes - - vectorizer = LoopVectorizer(self._ctx, lanes) - - def loop_predicate(loop: PsLoop): - return loop.counter.symbol == inner_loop_dim.counter - - kernel_ast = vectorizer.vectorize_select_loops(kernel_ast, loop_predicate) - - if self._intermediates is not None: - self._intermediates.cpu_vectorize = kernel_ast.clone() - - select_intrin = SelectIntrinsics(self._ctx, self._platform) - kernel_ast = cast(PsBlock, select_intrin(kernel_ast)) - - if self._intermediates is not None: - self._intermediates.cpu_select_intrins = kernel_ast.clone() - - return kernel_ast - - def _get_platform(self) -> Platform: - if Target._CPU in self._target: - if Target._X86 in self._target: - from .backend.platforms.x86 import X86VectorArch, X86VectorCpu - - arch: X86VectorArch - - if Target._SSE in self._target: - arch = X86VectorArch.SSE - elif Target._AVX in self._target: - arch = X86VectorArch.AVX - elif Target._AVX512 in self._target: - if Target._FP16 in self._target: - arch = X86VectorArch.AVX512_FP16 - else: - arch = X86VectorArch.AVX512 - else: - assert False, "unreachable code" - - return X86VectorCpu(self._ctx, arch) - elif self._target == Target.GenericCPU: - return GenericCpu(self._ctx) - else: - raise NotImplementedError( - f"No platform is currently available for CPU target {self._target}" - ) - - elif Target._GPU in self._target: - match self._target: - case Target.SYCL: - from .backend.platforms import SyclPlatform - - return SyclPlatform(self._ctx, self._cfg.gpu_indexing) - case Target.CUDA: - from .backend.platforms import CudaPlatform - - return CudaPlatform(self._ctx, self._cfg.gpu_indexing) - - raise NotImplementedError( - f"Code generation for target {self._target} not implemented" - ) - - -@dataclass -class StageResult: - ast: PsAstNode - label: str - - -class StageResultSlot: - def __init__(self, description: str | None = None): - self._description = description - self._name: str - self._lookup: str - - def __set_name__(self, owner, name: str): - self._name = name - self._lookup = f"_{name}" - - def __get__(self, obj, objtype=None) -> StageResult | None: - if obj is None: - return None - - ast = getattr(obj, self._lookup, None) - if ast is not None: - descr = self._name if self._description is None else self._description - return StageResult(ast, descr) - else: - return None - - def __set__(self, obj, val: PsAstNode | None): - setattr(obj, self._lookup, val) - - -class CodegenIntermediates: - """Intermediate results produced by the code generator.""" - - parsed_body = StageResultSlot("Freeze & Type Deduction") - materialized_ispace = StageResultSlot("Iteration Space Materialization") - constants_eliminated = StageResultSlot("Constant Elimination") - cpu_canonicalize = StageResultSlot("CPU: Symbol Canonicalization") - cpu_hoist_invariants = StageResultSlot("CPU: Hoisting of Loop Invariants") - cpu_vectorize = StageResultSlot("CPU: Vectorization") - cpu_select_intrins = StageResultSlot("CPU: Intrinsics Selection") - cpu_openmp = StageResultSlot("CPU: OpenMP Instrumentation") - lowered = StageResultSlot("C Language Lowering") - - @property - def available_stages(self) -> Sequence[StageResult]: - all_results: list[StageResult | None] = [ - getattr(self, name) - for name, slot in CodegenIntermediates.__dict__.items() - if isinstance(slot, StageResultSlot) - ] - return tuple(filter(lambda r: r is not None, all_results)) # type: ignore +create_kernel = _create_kernel def create_staggered_kernel( @@ -424,12 +20,3 @@ def create_staggered_kernel( "Staggered kernels are not yet implemented for pystencils 2.0" ) - -# Internals - - -def _parse_simplification_hints(ac: AssignmentCollection): - if "split_groups" in ac.simplification_hints: - raise NotImplementedError( - "Loop splitting was requested, but is not implemented yet" - ) diff --git a/tests/_todo/test_vectorization.py b/tests/_todo/test_vectorization.py index fd416ab4cd05c7b8891aae4da91cc9aeae425698..de71209ff28215ce28d3b625342d68b420e94f09 100644 --- a/tests/_todo/test_vectorization.py +++ b/tests/_todo/test_vectorization.py @@ -2,7 +2,6 @@ import numpy as np import pytest -import pystencils.config import sympy as sp import pystencils as ps @@ -141,7 +140,7 @@ def test_aligned_and_nt_stores(openmp, instruction_set=instruction_set): 'assume_inner_stride_one': True} update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))] # Without the base pointer spec, the inner store is not aligned - config = pystencils.config.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp) + config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp) ast = ps.create_kernel(update_rule, config=config) if instruction_set in ['sse'] or instruction_set.startswith('avx'): assert 'stream' in ast.instruction_set @@ -166,7 +165,7 @@ def test_nt_stores_symbolic_size(instruction_set=instruction_set): update_rule = [ps.Assignment(f.center(), 0.0), ps.Assignment(g.center(), 0.0)] opt = {'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True} - config = pystencils.config.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt) + config = ps.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt) ast = ps.create_kernel(update_rule, config=config) # ps.show_code(ast) ast.compile() @@ -187,7 +186,7 @@ def test_inplace_update(instruction_set=instruction_set): f1 @= 2 * s.tmp0 f2 @= 2 * s.tmp0 - config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}) + config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) kernel = ast.compile() kernel(f=arr) @@ -379,7 +378,7 @@ def test_issue40(*_): eq = [ps.Assignment(sp.Symbol('rho'), 1.0), ps.Assignment(src[0, 0](0), sp.Rational(4, 9) * sp.Symbol('rho'))] - config = pystencils.config.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt, data_type='float64') + config = ps.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt, data_type='float64') ast = ps.create_kernel(eq, config=config) code = ps.get_code_str(ast) diff --git a/tests/_todo/test_vectorization_specific.py b/tests/_todo/test_vectorization_specific.py index d1930a07aaf990997766d19e9263a4a637a86bef..0f2c68a4de3711a3f7920fb0b964c35705b45e4e 100644 --- a/tests/_todo/test_vectorization_specific.py +++ b/tests/_todo/test_vectorization_specific.py @@ -2,7 +2,6 @@ import pytest import numpy as np -import pystencils.config import sympy as sp import pystencils as ps @@ -30,7 +29,7 @@ def test_vectorisation_varying_arch(instruction_set): f1 @= 2 * s.tmp0 f2 @= 2 * s.tmp0 - config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}) + config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) kernel = ast.compile() kernel(f=arr) @@ -49,7 +48,7 @@ def test_vectorized_abs(instruction_set, dtype): f, g = ps.fields(f=arr, g=arr) update_rule = [ps.Assignment(g.center(), sp.Abs(f.center()))] - config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}) + config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) func = ast.compile() @@ -66,20 +65,20 @@ def test_strided(instruction_set, dtype): if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) \ and instruction_set not in ['avx512', 'avx512vl', 'rvv'] and not instruction_set.startswith('sve'): with pytest.warns(UserWarning) as warn: - config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}, + config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}, default_number_float=dtype) ast = ps.create_kernel(update_rule, config=config) assert 'Could not vectorize loop' in warn[0].message.args[0] else: with pytest.warns(None) as warn: - config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}, + config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}, default_number_float=dtype) ast = ps.create_kernel(update_rule, config=config) assert len(warn) == 0 # ps.show_code(ast) func = ast.compile() - ref_config = pystencils.config.CreateKernelConfig(default_number_float=dtype) + ref_config = ps.CreateKernelConfig(default_number_float=dtype) ref_func = ps.create_kernel(update_rule, config=ref_config).compile() # For some reason other array creations fail on the emulated ppc pipeline @@ -115,7 +114,7 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set update_rule = ps.Assignment(dst[0, 0], src[0, 0]) opt = {'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True} - config = pystencils.config.CreateKernelConfig(target=dh.default_target, + config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, ghost_layers=gl_kernel) ast = ps.create_kernel(update_rule, config=config) kernel = ast.compile() @@ -152,7 +151,7 @@ def test_vectorization_other(instruction_set, function): @pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('field_layout', ('fzyx', 'zyxf')) def test_square_root(dtype, instruction_set, field_layout): - config = pystencils.config.CreateKernelConfig(data_type=dtype, + config = ps.CreateKernelConfig(data_type=dtype, default_number_float=dtype, cpu_vectorize_info={'instruction_set': instruction_set, 'assume_inner_stride_one': True, @@ -195,7 +194,7 @@ def test_square_root_2(dtype, instruction_set, padding): @pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('padding', (True, False)) def test_pow(dtype, instruction_set, padding): - config = pystencils.config.CreateKernelConfig(data_type=dtype, + config = ps.CreateKernelConfig(data_type=dtype, default_number_float=dtype, cpu_vectorize_info={'instruction_set': instruction_set, 'assume_inner_stride_one': True, diff --git a/tests/frontend/test_simplifications.py b/tests/frontend/test_simplifications.py index 5e1bcb8ed06145f61ba31bcb6dea85a2e7bdbf58..45cde724108fe7578d8ff2dc9b8a2509a9add728 100644 --- a/tests/frontend/test_simplifications.py +++ b/tests/frontend/test_simplifications.py @@ -1,7 +1,6 @@ from sys import version_info as vs import pytest -import pystencils.config import sympy as sp import pystencils as ps @@ -188,7 +187,7 @@ def test_evaluate_constant_terms(target): src[0, 0]: -sp.cos(1) + dst[0, 0] }) - config = pystencils.config.CreateKernelConfig(target=target) + config = ps.CreateKernelConfig(target=target) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) assert 'cos(' not in code and 'cosf(' not in code diff --git a/tests/kernelcreation/test_sum_prod.py b/tests/kernelcreation/test_sum_prod.py index 9cd638c00300b9e3ff93c81f9ac56f5a6e5890f6..9d61d3bc4bf56c92569b4dbece1446a9395b8222 100644 --- a/tests/kernelcreation/test_sum_prod.py +++ b/tests/kernelcreation/test_sum_prod.py @@ -10,7 +10,6 @@ import pytest import numpy as np -import pystencils.config import sympy as sp import sympy.abc @@ -60,7 +59,7 @@ def test_product(dtype): assignments = ps.AssignmentCollection({x.center(): sum}) - config = pystencils.config.CreateKernelConfig() + config = ps.CreateKernelConfig() ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) diff --git a/tests/nbackend/kernelcreation/test_context.py b/tests/nbackend/kernelcreation/test_context.py index 384fc93158a9f7aa7ff9911b20382c0b79ed36ee..200c1e34e8ab3ac04fa119491805ef61111062c6 100644 --- a/tests/nbackend/kernelcreation/test_context.py +++ b/tests/nbackend/kernelcreation/test_context.py @@ -6,7 +6,7 @@ from pystencils import Field, TypedSymbol, FieldType, DynamicType from pystencils.backend.kernelcreation import KernelCreationContext from pystencils.backend.constants import PsConstant from pystencils.backend.memory import PsSymbol -from pystencils.backend.properties import FieldShape, FieldStride +from pystencils.codegen.properties import FieldShape, FieldStride from pystencils.backend.exceptions import KernelConstraintsError from pystencils.types.quick import SInt, Fp from pystencils.types import deconstify diff --git a/tests/nbackend/kernelcreation/test_options.py b/tests/nbackend/kernelcreation/test_options.py index 7fa7fc5131c657da7b9faa5fb983330e6fde8964..fefcc98fe62e956aeeba47543667e82bff758ec1 100644 --- a/tests/nbackend/kernelcreation/test_options.py +++ b/tests/nbackend/kernelcreation/test_options.py @@ -2,7 +2,7 @@ import pytest from pystencils.field import Field, FieldType from pystencils.types.quick import * -from pystencils.config import ( +from pystencils.codegen.config import ( CreateKernelConfig, PsOptionsError, )