Skip to content
Snippets Groups Projects
Commit 103553f4 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

add CPU optimizer config. Extend various doc comments.

parent ab47a5a4
Branches
Tags
No related merge requests found
Pipeline #64102 failed
......@@ -31,10 +31,15 @@ class PsAstNode(ABC):
@abstractmethod
def clone(self) -> PsAstNode:
"""Perform a deep copy of the AST."""
pass
def structurally_equal(self, other: PsAstNode) -> bool:
"""Check two ASTs for structural equality."""
"""Check two ASTs for structural equality.
By default this method checks the node's type and children.
If an AST node has additional internal state, it MUST override this method.
"""
return (
(type(self) is type(other))
and len(self.children) == len(other.children)
......
......@@ -41,25 +41,20 @@ FieldArrayPair = namedtuple("FieldArrayPair", ("field", "array"))
class KernelCreationContext:
"""Manages the translation process from the SymPy frontend to the backend AST, and collects
all necessary information for the translation.
all necessary information for the translation:
- *Data Types*: The kernel creation context manages the default data types for loop limits
and counters, index calculations, and the typifier.
- *Symbols*: The context maintains a symbol table, keeping track of all symbols encountered
during kernel translation together with their types.
- *Fields and Arrays*: The context collects all fields encountered during code generation,
applies a few consistency checks to them, and manages their associated arrays.
- *Iteration Space*: The context manages the iteration space of the kernel currently being
translated.
- *Constraints*: The context collects all kernel parameter constraints introduced during the
translation process.
- *Required Headers*: The context collects all header files required for the kernel to run.
Data Types
----------
The kernel creation context manages the default data types for loop limits and counters, index calculations,
and the typifier.
Fields and Arrays
------------------
The kernel creation context acts as a factory for mapping fields to arrays.
Iteration Space
---------------
The context manages the iteration space within which the current translation takes place. It may be a sparse
or full iteration space.
"""
def __init__(
......
......@@ -54,6 +54,7 @@ class FreezeExpressions:
- Augmented Assignments
- AddressOf
- Conditionals (+ frontend class)
- Relations (sp.Relational)
- pystencils.integer_functions
- pystencils.sympyextensions.bit_masks
- GPU fast approximations (pystencils.fast_approximation)
......
......@@ -132,17 +132,21 @@ class KernelFunction:
@property
def target(self) -> Target:
"""See pystencils.Target"""
return self._target
@property
def name(self) -> str:
return self._name
@name.setter
def name(self, n: str):
self._name = n
@property
def function_name(self) -> str:
"""For backward compatibility"""
return self._name
@property
def parameters(self) -> tuple[KernelParameter, ...]:
return self._params
......
......@@ -5,7 +5,7 @@ from .exceptions import PsInternalCompilerError
class PsSymbol:
"""A mutable symbol with name and data type.
Be advised to not create objects of this class directly unless you know what you are doing;
Do not create objects of this class directly unless you know what you are doing;
instead obtain them from a `KernelCreationContext` through `KernelCreationContext.get_symbol`.
This way, the context can keep track of all symbols used in the translation run,
and uniqueness of symbols is ensured.
......@@ -50,4 +50,4 @@ class PsSymbol:
return f"{self._name}: {dtype_str}"
def __repr__(self) -> str:
return str(self)
return f"PsSymbol({self._name}, {self._dtype})"
from __future__ import annotations
from collections.abc import Collection
from typing import Sequence
from dataclasses import dataclass
......@@ -11,6 +15,90 @@ from .types import PsIntegerType, PsNumericType, PsIeeeFloatType
from .defaults import DEFAULTS
@dataclass
class CpuOptimConfig:
"""Configuration for the CPU optimizer.
If any flag in this configuration is set to a value not supported by the CPU specified
in `CreateKernelConfig.target`, an error will be raised.
"""
openmp: bool = False
"""Enable OpenMP parallelization.
If set to `True`, the kernel will be parallelized using OpenMP according to the OpenMP settings
given in this configuration.
"""
vectorize: bool | VectorizationConfig = False
"""Enable and configure auto-vectorization.
If set to an instance of `VectorizationConfig` and a CPU target with vector capabilities is selected,
pystencils will attempt to vectorize the kernel according to the given vectorization options.
If set to `True`, pystencils will infer vectorization options from the given CPU target.
If set to `False`, no vectorization takes place.
"""
loop_blocking: None | tuple[int, ...] = None
"""Block sizes for loop blocking.
If set, the kernel's loops will be tiled according to the given block sizes.
"""
use_cacheline_zeroing: bool = False
"""Enable cache-line zeroing.
If set to `True` and the selected CPU supports cacheline zeroing, the CPU optimizer will attempt
to produce cacheline zeroing instructions where possible.
"""
@dataclass
class VectorizationConfig:
"""Configuration for the auto-vectorizer.
If any flag in this configuration is set to a value not supported by the CPU specified
in `CreateKernelConfig.target`, an error will be raised.
"""
vector_width: int | None = None
"""Desired vector register width in bits.
If set to an integer value, the vectorizer will use this as the desired vector register width.
If set to `None`, the vector register width will be automatically set to the broadest possible.
If the selected CPU does not support the given width, an error will be raised.
"""
use_nontemporal_stores: bool | Collection[str | Field] = False
"""Enable nontemporal (streaming) stores.
If set to `True` and the selected CPU supports streaming stores, the vectorizer will generate
nontemporal store instructions for all stores.
If set to a collection of fields (or field names), streaming stores will only be generated for
the given fields.
"""
assume_aligned: bool = False
"""Assume field pointer alignment.
If set to `True`, the vectorizer will assume that the address of the first inner entry
(after ghost layers) of each field is aligned at the necessary byte boundary.
"""
assume_inner_stride_one: bool = False
"""Assume stride associated with the innermost spatial coordinate of all fields is one.
If set to `True`, the vectorizer will replace the stride of the innermost spatial coordinate
with unity, thus enabling vectorization. If any fields already have a fixed innermost stride
that is not equal to one, an error will be raised.
"""
@dataclass
class CreateKernelConfig:
"""Options for create_kernel."""
......@@ -67,6 +155,12 @@ class CreateKernelConfig:
This data type will be applied to all untyped symbols.
"""
cpu_optim: None | CpuOptimConfig = None
"""Configuration of the CPU kernel optimizer.
If this parameter is set while `target` is a non-CPU target, an error will be raised.
"""
def __post_init__(self):
# Check iteration space argument consistency
if (
......@@ -88,6 +182,14 @@ class CreateKernelConfig:
raise PsOptionsError(
"Only fields with `field_type == FieldType.INDEXED` can be specified as `index_field`"
)
# Check optim
if self.cpu_optim is not None:
if not self.target.is_cpu():
raise PsOptionsError(f"`cpu_optim` cannot be set for non-CPU target {self.target}")
if self.cpu_optim.vectorize is not False and not self.target.is_vector_cpu():
raise PsOptionsError(f"Cannot enable auto-vectorization for non-vector CPU target {self.target}")
# Infer JIT
if self.jit is None:
......
......@@ -37,7 +37,8 @@ def create_kernel(
assignments: AssignmentCollection | list[Assignment] | Assignment,
config: CreateKernelConfig = CreateKernelConfig(),
):
"""Create a kernel AST from an assignment collection."""
"""Create a kernel function from an assignment collection."""
ctx = KernelCreationContext(
default_dtype=config.default_dtype, index_dtype=config.index_dtype
)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment