Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 261 additions and 123 deletions
......@@ -4,14 +4,6 @@
Symbolic Language
*****************
.. toctree::
:maxdepth: 2
:hidden:
field
sympyextensions
Pystencils allows you to define near-arbitrarily complex numerical kernels in its symbolic
language, which is based on the computer algebra system `SymPy <https://www.sympy.org>`_.
The pystencils code generator is able to parse and translate a large portion of SymPy's
......@@ -64,7 +56,7 @@ An assignment collection contains two separate lists of assignments:
into fields.
.. autosummary::
:toctree: autoapi
:toctree: generated
:nosignatures:
:template: autosummary/recursive_class.rst
......
......@@ -11,7 +11,7 @@ Type Creation and Conversion
----------------------------
.. autosummary::
:toctree: autoapi
:toctree: generated
:nosignatures:
create_type
......@@ -34,7 +34,7 @@ unless you have very particular needs.
:parts: 1
.. autosummary::
:toctree: autoapi
:toctree: generated
:nosignatures:
:template: autosummary/entire_class.rst
......@@ -82,10 +82,10 @@ Exceptions
.. currentmodule:: pystencils.types
.. autosummary::
:toctree: autoapi
:toctree: generated
:nosignatures:
pystencils.types.PsTypeError
PsTypeError
Implementation Details
......
......@@ -46,12 +46,13 @@ use_cython = [
]
doc = [
'sphinx',
'furo',
'nbsphinx',
'pydata-sphinx-theme==0.15.4',
'sphinx-book-theme==1.1.3', # workaround for https://github.com/executablebooks/sphinx-book-theme/issues/865
'sphinxcontrib-bibtex',
'sphinx_autodoc_typehints',
'pandoc',
'sphinx_design',
'myst-nb'
]
tests = [
'pytest',
......
......@@ -5,6 +5,7 @@ from .defaults import DEFAULTS
from . import fd
from . import stencil as stencil
from .display_utils import get_code_obj, get_code_str, show_code, to_dot
from .inspection import inspect
from .field import Field, FieldType, fields
from .types import create_type, create_numeric_type
from .cache import clear_cache
......@@ -37,7 +38,6 @@ from .sympyextensions.typed_sympy import TypedSymbol, DynamicType
from .sympyextensions import SymbolCreator
from .datahandling import create_data_handling
__all__ = [
"Field",
"FieldType",
......@@ -63,6 +63,7 @@ __all__ = [
"to_dot",
"get_code_obj",
"get_code_str",
"inspect",
"AssignmentCollection",
"Assignment",
"AddAugmentedAssignment",
......
from .base_printer import EmissionError
from .c_printer import emit_code, CAstPrinter
from .ir_printer import emit_ir, IRAstPrinter
__all__ = ["emit_code", "CAstPrinter", "emit_ir", "IRAstPrinter"]
__all__ = ["emit_code", "CAstPrinter", "emit_ir", "IRAstPrinter", "EmissionError"]
......@@ -189,7 +189,7 @@ class BasePrinter(ABC):
pc.indent_level += self._indent_width
interior = "\n".join(self.visit(stmt, pc) for stmt in statements) + "\n"
pc.indent_level -= self._indent_width
return pc.indent("{\n") + interior + pc.indent("}\n")
return pc.indent("{\n") + interior + pc.indent("}")
case PsStatement(expr):
return pc.indent(f"{self.visit(expr, pc)};")
......
......@@ -5,7 +5,7 @@ from pystencils.backend.memory import PsSymbol
from .base_printer import BasePrinter
from ..kernelfunction import KernelFunction
from ...types import PsType, PsArrayType, PsScalarType
from ...types import PsType, PsArrayType, PsScalarType, PsTypeError
from ..ast.expressions import PsBufferAcc
from ..ast.vector import PsVecMemAcc
......@@ -23,7 +23,10 @@ class CAstPrinter(BasePrinter):
def visit(self, node: PsAstNode, pc: PrinterCtx) -> str:
match node:
case PsVecMemAcc():
raise EmissionError("Cannot print vectorized array accesses to C code.")
raise EmissionError(
f"Unable to print C code for vector memory access {node}.\n"
f"Vectorized memory accesses must be mapped to intrinsics before emission."
)
case PsBufferAcc():
raise EmissionError(
......@@ -33,7 +36,7 @@ class CAstPrinter(BasePrinter):
case _:
return super().visit(node, pc)
def _symbol_decl(self, symb: PsSymbol):
dtype = symb.get_dtype()
......@@ -52,11 +55,12 @@ class CAstPrinter(BasePrinter):
def _constant_literal(self, constant: PsConstant):
dtype = constant.get_dtype()
if not isinstance(dtype, PsScalarType):
raise EmissionError(
"Cannot print literals for non-scalar constants."
)
raise EmissionError("Cannot print literals for non-scalar constants.")
return dtype.create_literal(constant.value)
def _type_str(self, dtype: PsType):
return dtype.c_string()
try:
return dtype.c_string()
except PsTypeError:
raise EmissionError(f"Unable to print type {dtype} as a C data type.")
......@@ -59,7 +59,7 @@ class IRAstPrinter(BasePrinter):
stride_code = "" if stride is None else f", stride={stride}"
code = f"vec_load< {lanes}{stride_code} >({ptr_code}, {offset_code})"
code = f"vec_memacc< {lanes}{stride_code} >({ptr_code}, {offset_code})"
return pc.parenthesize(code, Ops.Subscript)
case PsVecBroadcast(lanes, operand):
......
......@@ -41,6 +41,7 @@ class CupyKernelWrapper(KernelWrapper):
self._kfunc: GpuKernelFunction = kfunc
self._raw_kernel = raw_kernel
self._block_size = block_size
self._num_blocks: tuple[int, int, int] | None = None
self._args_cache: dict[Any, tuple] = dict()
@property
......@@ -59,6 +60,14 @@ class CupyKernelWrapper(KernelWrapper):
def block_size(self, bs: tuple[int, int, int]):
self._block_size = bs
@property
def num_blocks(self) -> tuple[int, int, int] | None:
return self._num_blocks
@num_blocks.setter
def num_blocks(self, nb: tuple[int, int, int] | None):
self._num_blocks = nb
def __call__(self, **kwargs: Any):
kernel_args, launch_grid = self._get_cached_args(**kwargs)
device = self._get_device(kernel_args)
......@@ -72,7 +81,7 @@ class CupyKernelWrapper(KernelWrapper):
return devices.pop()
def _get_cached_args(self, **kwargs):
key = (self._block_size,) + tuple((k, id(v)) for k, v in kwargs.items())
key = (self._block_size, self._num_blocks) + tuple((k, id(v)) for k, v in kwargs.items())
if key not in self._args_cache:
args = self._get_args(**kwargs)
......@@ -185,25 +194,36 @@ class CupyKernelWrapper(KernelWrapper):
symbolic_threads_range = self._kfunc.threads_range
threads_range: list[int] = [
evaluate_expression(expr, valuation)
for expr in symbolic_threads_range.num_work_items
]
if self._num_blocks is not None:
launch_grid = LaunchGrid(self._num_blocks, self._block_size)
if symbolic_threads_range.dim < 3:
threads_range += [1] * (3 - symbolic_threads_range.dim)
elif symbolic_threads_range is not None:
threads_range: list[int] = [
evaluate_expression(expr, valuation)
for expr in symbolic_threads_range.num_work_items
]
def div_ceil(a, b):
return a // b if a % b == 0 else a // b + 1
if symbolic_threads_range.dim < 3:
threads_range += [1] * (3 - symbolic_threads_range.dim)
# TODO: Refine this?
grid_size = tuple(
div_ceil(threads, tpb)
for threads, tpb in zip(threads_range, self._block_size)
)
assert len(grid_size) == 3
def div_ceil(a, b):
return a // b if a % b == 0 else a // b + 1
# TODO: Refine this?
num_blocks = tuple(
div_ceil(threads, tpb)
for threads, tpb in zip(threads_range, self._block_size)
)
assert len(num_blocks) == 3
launch_grid = LaunchGrid(num_blocks, self._block_size)
launch_grid = LaunchGrid(grid_size, self._block_size)
else:
raise JitError(
"Unable to determine launch grid for GPU kernel invocation: "
"No manual grid size was specified, and the number of threads could not "
"be determined automatically."
)
return tuple(args), launch_grid
......
......@@ -139,6 +139,13 @@ class AstFactory:
self._typify(self.parse_index(iter_slice) + self.parse_index(1))
)
step = self.parse_index(1)
if normalize_to is not None:
upper_limit = self.parse_index(normalize_to)
if isinstance(start, PsConstantExpr) and start.constant.value < 0:
start = fold(self._typify(upper_limit.clone() + start))
stop = fold(self._typify(upper_limit.clone() + stop))
else:
start = self._parse_any_index(
iter_slice.start if iter_slice.start is not None else 0
......@@ -157,21 +164,21 @@ class AstFactory:
f"Invalid value for `slice.step`: {step.constant.value}"
)
if normalize_to is not None:
upper_limit = self.parse_index(normalize_to)
if isinstance(start, PsConstantExpr) and start.constant.value < 0:
start = fold(self._typify(upper_limit.clone() + start))
if normalize_to is not None:
upper_limit = self.parse_index(normalize_to)
if isinstance(start, PsConstantExpr) and start.constant.value < 0:
start = fold(self._typify(upper_limit.clone() + start))
if stop is None:
stop = upper_limit
elif isinstance(stop, PsConstantExpr) and stop.constant.value < 0:
stop = fold(self._typify(upper_limit.clone() + stop))
if stop is None:
stop = upper_limit
elif isinstance(stop, PsConstantExpr) and stop.constant.value < 0:
stop = fold(self._typify(upper_limit.clone() + stop))
elif stop is None:
raise ValueError(
"Cannot parse a slice with `stop == None` if no normalization limit is given"
)
elif stop is None:
raise ValueError(
"Cannot parse a slice with `stop == None` if no normalization limit is given"
)
assert stop is not None # for mypy
return start, stop, step
......
......@@ -6,6 +6,7 @@ from functools import reduce
from operator import mul
from ...defaults import DEFAULTS
from ...config import _AUTO_TYPE, AUTO
from ...simp import AssignmentCollection
from ...field import Field, FieldType
......@@ -195,21 +196,25 @@ class FullIterationSpace(IterationSpace):
def dimensions(self):
"""The dimensions of this iteration space"""
return self._dimensions
@property
def counters(self) -> tuple[PsSymbol, ...]:
return tuple(dim.counter for dim in self._dimensions)
@property
def lower(self):
def lower(self) -> tuple[PsExpression, ...]:
"""Lower limits of each dimension"""
return (dim.start for dim in self._dimensions)
return tuple(dim.start for dim in self._dimensions)
@property
def upper(self):
def upper(self) -> tuple[PsExpression, ...]:
"""Upper limits of each dimension"""
return (dim.stop for dim in self._dimensions)
return tuple(dim.stop for dim in self._dimensions)
@property
def steps(self):
def steps(self) -> tuple[PsExpression, ...]:
"""Iteration steps of each dimension"""
return (dim.step for dim in self._dimensions)
return tuple(dim.step for dim in self._dimensions)
@property
def archetype_field(self) -> Field | None:
......@@ -412,7 +417,7 @@ def create_sparse_iteration_space(
def create_full_iteration_space(
ctx: KernelCreationContext,
assignments: AssignmentCollection,
ghost_layers: None | int | Sequence[int | tuple[int, int]] = None,
ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None,
iteration_slice: None | int | slice | tuple[int | slice, ...] = None,
) -> IterationSpace:
assert not ctx.fields.index_fields
......@@ -452,16 +457,7 @@ def create_full_iteration_space(
# Otherwise, if an iteration slice was specified, use that
# Otherwise, use the inferred ghost layers
if ghost_layers is not None:
ctx.metadata["ghost_layers"] = ghost_layers
return FullIterationSpace.create_with_ghost_layers(
ctx, ghost_layers, archetype_field
)
elif iteration_slice is not None:
return FullIterationSpace.create_from_slice(
ctx, iteration_slice, archetype_field
)
else:
if ghost_layers is AUTO:
if len(domain_field_accesses) > 0:
inferred_gls = max(
[fa.required_ghost_layers for fa in domain_field_accesses]
......@@ -473,3 +469,15 @@ def create_full_iteration_space(
return FullIterationSpace.create_with_ghost_layers(
ctx, inferred_gls, archetype_field
)
elif ghost_layers is not None:
assert not isinstance(ghost_layers, _AUTO_TYPE)
ctx.metadata["ghost_layers"] = ghost_layers
return FullIterationSpace.create_with_ghost_layers(
ctx, ghost_layers, archetype_field
)
elif iteration_slice is not None:
return FullIterationSpace.create_from_slice(
ctx, iteration_slice, archetype_field
)
else:
assert False, "unreachable code"
......@@ -259,10 +259,12 @@ def create_cpu_kernel_function(
class GpuKernelFunction(KernelFunction):
"""Internal representation of a kernel function targeted at CUDA GPUs."""
def __init__(
self,
body: PsBlock,
threads_range: GpuThreadsRange,
threads_range: GpuThreadsRange | None,
target: Target,
name: str,
parameters: Sequence[KernelParameter],
......@@ -276,7 +278,8 @@ class GpuKernelFunction(KernelFunction):
self._threads_range = threads_range
@property
def threads_range(self) -> GpuThreadsRange:
def threads_range(self) -> GpuThreadsRange | None:
"""Object exposing the total size of the launch grid this kernel expects to be executed with."""
return self._threads_range
......@@ -284,14 +287,16 @@ def create_gpu_kernel_function(
ctx: KernelCreationContext,
platform: Platform,
body: PsBlock,
threads_range: GpuThreadsRange,
threads_range: GpuThreadsRange | None,
function_name: str,
target_spec: Target,
jit: JitBase,
):
undef_symbols = collect_undefined_symbols(body)
for threads in threads_range.num_work_items:
undef_symbols |= collect_undefined_symbols(threads)
if threads_range is not None:
for threads in threads_range.num_work_items:
undef_symbols |= collect_undefined_symbols(threads)
params = _get_function_params(ctx, undef_symbols)
req_headers = _get_headers(ctx, platform, body)
......
from warnings import warn
from ...types import constify
from ..exceptions import MaterializationError
from .generic_gpu import GenericGpu, GpuThreadsRange
......@@ -7,7 +9,7 @@ from ..kernelcreation import (
IterationSpace,
FullIterationSpace,
SparseIterationSpace,
AstFactory
AstFactory,
)
from ..kernelcreation.context import KernelCreationContext
......@@ -43,6 +45,7 @@ GRID_DIM = [
class CudaPlatform(GenericGpu):
"""Platform for CUDA-based GPUs."""
def __init__(
self, ctx: KernelCreationContext, indexing_cfg: GpuIndexingConfig | None = None
......@@ -57,7 +60,7 @@ class CudaPlatform(GenericGpu):
def materialize_iteration_space(
self, body: PsBlock, ispace: IterationSpace
) -> tuple[PsBlock, GpuThreadsRange]:
) -> tuple[PsBlock, GpuThreadsRange | None]:
if isinstance(ispace, FullIterationSpace):
return self._prepend_dense_translation(body, ispace)
elif isinstance(ispace, SparseIterationSpace):
......@@ -112,6 +115,11 @@ class CudaPlatform(GenericGpu):
case MathFunctions.Abs if dtype.width == 16:
cfunc = CFunction(" __habs", arg_types, dtype)
case _:
raise MaterializationError(
f"Cannot materialize call to function {func}"
)
call.function = cfunc
return call
......@@ -123,9 +131,21 @@ class CudaPlatform(GenericGpu):
def _prepend_dense_translation(
self, body: PsBlock, ispace: FullIterationSpace
) -> tuple[PsBlock, GpuThreadsRange]:
) -> tuple[PsBlock, GpuThreadsRange | None]:
dimensions = ispace.dimensions_in_loop_order()
launch_config = GpuThreadsRange.from_ispace(ispace)
if not self._cfg.manual_launch_grid:
try:
threads_range = GpuThreadsRange.from_ispace(ispace)
except MaterializationError as e:
warn(
str(e.args[0])
+ "\nIf this is intended, set `manual_launch_grid=True` in the code generator configuration.",
UserWarning,
)
threads_range = None
else:
threads_range = None
indexing_decls = []
conds = []
......@@ -146,6 +166,8 @@ class CudaPlatform(GenericGpu):
if not self._cfg.omit_range_check:
conds.append(PsLt(ctr, dim.stop))
indexing_decls = indexing_decls[::-1]
if conds:
condition: PsExpression = conds[0]
for cond in conds[1:]:
......@@ -155,7 +177,7 @@ class CudaPlatform(GenericGpu):
body.statements = indexing_decls + body.statements
ast = body
return ast, launch_config
return ast, threads_range
def _prepend_sparse_translation(
self, body: PsBlock, ispace: SparseIterationSpace
......
......@@ -10,6 +10,7 @@ from ..kernelcreation.iteration_space import (
SparseIterationSpace,
)
from .platform import Platform
from ..exceptions import MaterializationError
class GpuThreadsRange:
......@@ -48,6 +49,15 @@ class GpuThreadsRange:
@property
def dim(self) -> int:
return self._dim
def __str__(self) -> str:
rep = "GpuThreadsRange { "
rep += "; ".join(f"{x}: {w}" for x, w in zip("xyz", self._num_work_items))
rep += " }"
return rep
def _repr_html_(self) -> str:
return str(self)
@staticmethod
def _from_full_ispace(ispace: FullIterationSpace) -> GpuThreadsRange:
......@@ -56,6 +66,19 @@ class GpuThreadsRange:
raise NotImplementedError(
f"Cannot create a GPU threads range for an {len(dimensions)}-dimensional iteration space"
)
from ..ast.analysis import collect_undefined_symbols as collect
for dim in dimensions:
symbs = collect(dim.start) | collect(dim.stop) | collect(dim.step)
for ctr in ispace.counters:
if ctr in symbs:
raise MaterializationError(
"Unable to construct GPU threads range for iteration space: "
f"Limits of dimension counter {dim.counter.name} "
f"depend on another dimension's counter {ctr.name}"
)
work_items = [ispace.actual_iterations(dim) for dim in dimensions]
return GpuThreadsRange(work_items)
......@@ -63,6 +86,6 @@ class GpuThreadsRange:
class GenericGpu(Platform):
@abstractmethod
def materialize_iteration_space(
self, block: PsBlock, ispace: IterationSpace
) -> tuple[PsBlock, GpuThreadsRange]:
self, body: PsBlock, ispace: IterationSpace
) -> tuple[PsBlock, GpuThreadsRange | None]:
pass
......@@ -27,7 +27,7 @@ class Platform(ABC):
@abstractmethod
def materialize_iteration_space(
self, block: PsBlock, ispace: IterationSpace
self, body: PsBlock, ispace: IterationSpace
) -> PsBlock | tuple[PsBlock, Any]:
pass
......
......@@ -8,7 +8,7 @@ from ..kernelcreation import KernelCreationContext
from ..constants import PsConstant
from ..ast import PsAstNode
from ..ast.structural import PsLoop, PsBlock, PsDeclaration
from ..ast.expressions import PsExpression
from ..ast.expressions import PsExpression, PsTernary, PsGt
from ..ast.vector import PsVecBroadcast
from ..ast.analysis import collect_undefined_symbols
......@@ -18,7 +18,7 @@ from .rewrite import substitute_symbols
class LoopVectorizer:
"""Vectorize loops.
The loop vectorizer provides methods to vectorize single loops inside an AST
using a given number of vector lanes.
During vectorization, the loop body is transformed using the `AstVectorizer`,
......@@ -64,29 +64,26 @@ class LoopVectorizer:
@overload
def vectorize_select_loops(
self, node: PsBlock, predicate: Callable[[PsLoop], bool]
) -> PsBlock:
...
) -> PsBlock: ...
@overload
def vectorize_select_loops(
self, node: PsLoop, predicate: Callable[[PsLoop], bool]
) -> PsLoop | PsBlock:
...
) -> PsLoop | PsBlock: ...
@overload
def vectorize_select_loops(
self, node: PsAstNode, predicate: Callable[[PsLoop], bool]
) -> PsAstNode:
...
) -> PsAstNode: ...
def vectorize_select_loops(
self, node: PsAstNode, predicate: Callable[[PsLoop], bool]
) -> PsAstNode:
"""Select and vectorize loops from a syntax tree according to a predicate.
Finds each loop inside a subtree and evaluates ``predicate`` on them.
If ``predicate(loop)`` evaluates to `True`, the loop is vectorized.
Loops nested inside a vectorized loop will not be processed.
Args:
......@@ -139,7 +136,7 @@ class LoopVectorizer:
# Generate vectorized loop body
simd_body = self._vectorize_ast(loop.body, vc)
if vector_ctr in collect_undefined_symbols(simd_body):
simd_body.statements.insert(0, vector_counter_decl)
......@@ -186,20 +183,31 @@ class LoopVectorizer:
trailing_start = self._ctx.get_new_symbol(
f"__{scalar_ctr.name}_trailing_start", scalar_ctr.get_dtype()
)
trailing_start_decl = self._type_fold(
PsDeclaration(
PsExpression.make(trailing_start),
(
PsTernary(
# If at least one vectorized iteration took place...
PsGt(
PsExpression.make(simd_stop),
simd_start.clone(),
),
# start from the smallest non-valid multiple of simd_step, offset from simd_start
(
PsExpression.make(simd_stop)
- simd_start.clone()
- PsExpression.make(PsConstant(1))
(
PsExpression.make(simd_stop)
- simd_start.clone()
- PsExpression.make(PsConstant(1))
)
/ PsExpression.make(simd_step)
+ PsExpression.make(PsConstant(1))
)
/ PsExpression.make(simd_step)
+ PsExpression.make(PsConstant(1))
)
* PsExpression.make(simd_step)
+ simd_start.clone(),
* PsExpression.make(simd_step)
+ simd_start.clone(),
# otherwise start at zero
simd_start.clone(),
),
)
)
......
......@@ -314,7 +314,7 @@ class BoundaryHandling:
def _create_boundary_kernel(self, symbolic_field, symbolic_index_field, boundary_obj):
return create_boundary_kernel(symbolic_field, symbolic_index_field, self.stencil, boundary_obj,
target=self._target,) # cpu_openmp=self._openmp) TODO: replace
target=self._target, cpu_openmp=self._openmp)
def _create_index_fields(self):
dh = self._data_handling
......
......@@ -28,6 +28,19 @@ class PsOptionsError(Exception):
"""Indicates an option clash in the `CreateKernelConfig`."""
class _AUTO_TYPE:
...
AUTO = _AUTO_TYPE()
"""Special value that can be passed to some options for invoking automatic behaviour.
Currently, these options permit `AUTO`:
- `ghost_layers <CreateKernelConfig.ghost_layers>`
"""
@dataclass
class OpenMpConfig:
"""Parameters controlling kernel parallelization using OpenMP."""
......@@ -182,6 +195,14 @@ class GpuIndexingConfig:
block_size: tuple[int, int, int] | None = None
"""Desired block size for the execution of GPU kernels. May be overridden later by the runtime system."""
manual_launch_grid: bool = False
"""Always require a manually specified launch grid when running this kernel.
If set to `True`, the code generator will not attempt to infer the size of
the launch grid from the kernel.
The launch grid will then have to be specified manually at runtime.
"""
sycl_automatic_block_size: bool = True
"""If set to `True` while generating for `Target.SYCL`, let the SYCL runtime decide on the block size.
......@@ -213,32 +234,43 @@ class CreateKernelConfig:
function_name: str = "kernel"
"""Name of the generated function"""
ghost_layers: None | int | Sequence[int | tuple[int, int]] = None
ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None
"""Specifies the number of ghost layers of the iteration region.
Options:
- `None`: Required ghost layers are inferred from field accesses
- :py:data:`AUTO <pystencils.config.AUTO>`: Required ghost layers are inferred from field accesses
- `int`: A uniform number of ghost layers in each spatial coordinate is applied
- ``Sequence[int, tuple[int, int]]``: Ghost layers are specified for each spatial coordinate.
In each coordinate, a single integer specifies the ghost layers at both the lower and upper iteration limit,
while a pair of integers specifies the lower and upper ghost layers separately.
When manually specifying ghost layers, it is the user's responsibility to avoid out-of-bounds memory accesses.
If ``ghost_layers=None`` is specified, the iteration region may otherwise be set using the `iteration_slice` option.
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
"""
iteration_slice: None | Sequence[slice] = None
iteration_slice: None | int | slice | tuple[int | slice] = None
"""Specifies the kernel's iteration slice.
`iteration_slice` may only be set if ``ghost_layers=None``.
If it is set, a slice must be specified for each spatial coordinate.
TODO: Specification of valid slices and their behaviour
Example:
>>> cfg = CreateKernelConfig(
... iteration_slice=ps.make_slice[3:14, 2:-2]
... )
>>> cfg.iteration_slice
(slice(3, 14, None), slice(2, -2, None))
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
"""
index_field: Field | None = None
"""Index field for a sparse kernel.
If this option is set, a sparse kernel with the given field as index field will be generated.
.. note::
At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
"""
"""Data Types"""
......
......@@ -291,7 +291,10 @@ class SerialDataHandling(DataHandling):
def synchronization_function(self, names, stencil=None, target=None, functor=None, **_):
if target is None:
target = self.default_target
assert target in (Target.CPU, Target.GPU)
if not (target.is_cpu() or target == Target.CUDA):
raise ValueError(f"Unsupported target: {target}")
if not hasattr(names, '__len__') or type(names) is str:
names = [names]
......@@ -325,7 +328,7 @@ class SerialDataHandling(DataHandling):
values_per_cell = values_per_cell[0]
if len(filtered_stencil) > 0:
if target == Target.CPU:
if target.is_cpu():
if functor is None:
from pystencils.slicing import get_periodic_boundary_functor
functor = get_periodic_boundary_functor
......
......@@ -988,24 +988,35 @@ def create_numpy_array_with_layout(shape, layout, alignment=False, byte_offset=0
def spatial_layout_string_to_tuple(layout_str: str, dim: int) -> Tuple[int, ...]:
if layout_str in ('fzyx', 'zyxf'):
assert dim <= 3
return tuple(reversed(range(dim)))
if dim <= 0:
raise ValueError("Dimensionality must be positive")
layout_str = layout_str.lower()
if layout_str in ('fzyx', 'f', 'reverse_numpy', 'SoA'):
if layout_str in ('fzyx', 'zyxf', 'soa', 'aos'):
if dim > 3:
raise ValueError(f"Invalid spatial dimensionality for layout descriptor {layout_str}: May be at most 3.")
return tuple(reversed(range(dim)))
if layout_str in ('f', 'reverse_numpy'):
return tuple(reversed(range(dim)))
elif layout_str in ('c', 'numpy', 'AoS'):
elif layout_str in ('c', 'numpy'):
return tuple(range(dim))
raise ValueError("Unknown layout descriptor " + layout_str)
def layout_string_to_tuple(layout_str, dim):
if dim <= 0:
raise ValueError("Dimensionality must be positive")
layout_str = layout_str.lower()
if layout_str == 'fzyx' or layout_str == 'soa':
assert dim <= 4
if dim > 4:
raise ValueError(f"Invalid total dimensionality for layout descriptor {layout_str}: May be at most 4.")
return tuple(reversed(range(dim)))
elif layout_str == 'zyxf' or layout_str == 'aos':
assert dim <= 4
if dim > 4:
raise ValueError(f"Invalid total dimensionality for layout descriptor {layout_str}: May be at most 4.")
return tuple(reversed(range(dim - 1))) + (dim - 1,)
elif layout_str == 'f' or layout_str == 'reverse_numpy':
return tuple(reversed(range(dim)))
......