5557c6cc · 5557c6cc · 5557c6cc · 5557c6cc · 5557c6cc · 5557c6cc
--- a/src/pystencils/inspection.py
+++ b/src/pystencils/inspection.py
+from typing import overload
+
+from .backend.ast import PsAstNode
+from .backend.emission import CAstPrinter, IRAstPrinter, EmissionError
+from .backend.kernelfunction import KernelFunction
+from .kernelcreation import StageResult, CodegenIntermediates
+from abc import ABC, abstractmethod
+
+_UNABLE_TO_DISPLAY_CPP = """
+<div>
+    <b>Unable to display C code for this abstract syntax tree</b>
+    <p>
+    This intermediate abstract syntax tree contains nodes that cannot be
+    printed as valid C code.
+    </p>
+</div>
+"""
+
+_GRAPHVIZ_NOT_IMPLEMENTED = """
+<div>
+    <b>AST Visualization Unavailable</b>
+    <p>
+    AST visualization using GraphViz is not implemented yet.
+    </p>
+</div>
+"""
+
+_ERR_MSG = """
+<div style="font-family: monospace; background-color: #EEEEEE; white-space: nowrap; overflow-x: scroll">
+    {}
+</div>
+"""
+
+
+class CodeInspectionBase(ABC):
+    def __init__(self) -> None:
+        self._ir_printer = IRAstPrinter(annotate_constants=False)
+        self._c_printer = CAstPrinter()
+
+    def _ir_tab(self, ir_obj: PsAstNode | KernelFunction):
+        import ipywidgets as widgets
+
+        ir = self._ir_printer(ir_obj)
+        ir_tab = widgets.HTML(self._highlight_as_cpp(ir))
+        self._apply_tab_layout(ir_tab)
+        return ir_tab
+
+    def _cpp_tab(self, ir_obj: PsAstNode | KernelFunction):
+        import ipywidgets as widgets
+
+        try:
+            cpp = self._c_printer(ir_obj)
+            cpp_tab = widgets.HTML(self._highlight_as_cpp(cpp))
+        except EmissionError as e:
+            cpp_tab = widgets.VBox(
+                children=[
+                    widgets.HTML(_UNABLE_TO_DISPLAY_CPP),
+                    widgets.Accordion(
+                        children=[widgets.HTML(_ERR_MSG.format(e.args[0]))],
+                        titles=["Error Details"],
+                    ),
+                ]
+            )
+        self._apply_tab_layout(cpp_tab)
+        return cpp_tab
+
+    def _graphviz_tab(self, ir_obj: PsAstNode | KernelFunction):
+        import ipywidgets as widgets
+
+        graphviz_tab = widgets.HTML(_GRAPHVIZ_NOT_IMPLEMENTED)
+        self._apply_tab_layout(graphviz_tab)
+        return graphviz_tab
+
+    def _apply_tab_layout(self, tab):
+        tab.layout.display = "inline-block"
+        tab.layout.padding = "0 15pt 0 0"
+
+    def _highlight_as_cpp(self, code: str) -> str:
+        from pygments import highlight
+        from pygments.formatters import HtmlFormatter
+        from pygments.lexers import CppLexer
+
+        formatter = HtmlFormatter(
+            prestyles="white-space: pre;",
+        )
+        html_code = highlight(code, CppLexer(), formatter)
+        return html_code
+
+    def _ipython_display_(self):
+        from IPython.display import display
+
+        display(self._widget())
+
+    @abstractmethod
+    def _widget(self): ...
+
+
+class AstInspection(CodeInspectionBase):
+    """Inspect an abstract syntax tree produced by the code generation backend.
+
+    **Interactive:** This class can be used in Jupyter notebooks to interactively
+    explore an abstract syntax tree.
+    """
+
+    def __init__(
+        self,
+        ast: PsAstNode,
+        show_ir: bool = True,
+        show_cpp: bool = True,
+        show_graph: bool = True,
+    ):
+        super().__init__()
+        self._ast = ast
+        self._show_ir = show_ir
+        self._show_cpp = show_cpp
+        self._show_graph = show_graph
+
+    def _widget(self):
+        import ipywidgets as widgets
+
+        tabs = []
+        if self._show_ir:
+            tabs.append(self._ir_tab(self._ast))
+        if self._show_cpp:
+            tabs.append(self._cpp_tab(self._ast))
+        if self._show_graph:
+            tabs.append(self._graphviz_tab(self._ast))
+
+        tabs = widgets.Tab(children=tabs)
+        tabs.titles = ["IR Code", "C Code", "AST Visualization"]
+
+        tabs.layout.height = "250pt"
+
+        return tabs
+
+
+class KernelInspection(CodeInspectionBase):
+    def __init__(
+        self,
+        kernel: KernelFunction,
+        show_ir: bool = True,
+        show_cpp: bool = True,
+        show_graph: bool = True,
+    ) -> None:
+        super().__init__()
+        self._kernel = kernel
+        self._show_ir = show_ir
+        self._show_cpp = show_cpp
+        self._show_graph = show_graph
+
+    def _widget(self):
+        import ipywidgets as widgets
+
+        tabs = []
+        if self._show_ir:
+            tabs.append(self._ir_tab(self._kernel))
+        if self._show_cpp:
+            tabs.append(self._cpp_tab(self._kernel))
+        if self._show_graph:
+            tabs.append(self._graphviz_tab(self._kernel))
+
+        tabs = widgets.Tab(children=tabs)
+        tabs.titles = ["IR Code", "C Code", "AST Visualization"]
+
+        tabs.layout.height = "250pt"
+
+        return tabs
+
+
+class IntermediatesInspection:
+    def __init__(
+        self,
+        intermediates: CodegenIntermediates,
+        show_ir: bool = True,
+        show_cpp: bool = True,
+        show_graph: bool = True,
+    ):
+        self._intermediates = intermediates
+        self._show_ir = show_ir
+        self._show_cpp = show_cpp
+        self._show_graph = show_graph
+
+    def _ipython_display_(self):
+        from IPython.display import display
+        import ipywidgets as widgets
+
+        stages = self._intermediates.available_stages
+
+        previews: list[AstInspection] = [
+            AstInspection(
+                stage.ast,
+                show_ir=self._show_ir,
+                show_cpp=self._show_cpp,
+                show_graph=self._show_graph,
+            )
+            for stage in stages
+        ]
+        labels: list[str] = [stage.label for stage in stages]
+
+        code_views = [p._widget() for p in previews]
+        for v in code_views:
+            v.layout.width = "100%"
+
+        select_label = widgets.HTML("<div><b>Code Generator Stages</b></div>")
+        select = widgets.Select(options=labels)
+        select.layout.height = "250pt"
+
+        selection_box = widgets.VBox([select_label, select])
+        selection_box.layout.overflow = "visible"
+
+        preview_label = widgets.HTML("<div><b>Preview</b></div>")
+        preview_stack = widgets.Stack(children=code_views)
+        preview_stack.layout.overflow = "hidden"
+
+        preview_box = widgets.VBox([preview_label, preview_stack])
+
+        widgets.jslink((select, "index"), (preview_stack, "selected_index"))
+
+        grid = widgets.GridBox(
+            [selection_box, preview_box],
+            layout=widgets.Layout(grid_template_columns="max-content auto"),
+        )
+
+        display(grid)
+
+
+@overload
+def inspect(obj: PsAstNode): ...
+
+
+@overload
+def inspect(obj: KernelFunction): ...
+
+
+@overload
+def inspect(obj: StageResult): ...
+
+
+@overload
+def inspect(obj: CodegenIntermediates): ...
+
+
+def inspect(obj, show_ir: bool = True, show_cpp: bool = True, show_graph: bool = True):
+    """Interactively inspect various products of the code generator.
+
+    When run inside a Jupyter notebook, this function displays an inspection widget
+    for the following types of objects:
+    - `PsAstNode`
+    - `KernelFunction`
+    - `StageResult`
+    - `CodegenIntermediates`
+    """
+
+    from IPython.display import display
+
+    match obj:
+        case PsAstNode():
+            preview = AstInspection(
+                obj, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
+            )
+        case KernelFunction():
+            preview = KernelInspection(
+                obj, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
+            )
+        case StageResult(ast, _):
+            preview = AstInspection(
+                ast, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
+            )
+        case CodegenIntermediates():
+            preview = IntermediatesInspection(
+                obj, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
+            )
+        case _:
+            raise ValueError(f"Cannot inspect object of type {type(obj)}")
+
+    display(preview)
--- a/src/pystencils/kernelcreation.py
+++ b/src/pystencils/kernelcreation.py
+from __future__ import annotations
+
 from typing import cast, Sequence
-from dataclasses import replace
+from dataclasses import dataclass, replace

 from .target import Target
 from .config import (
    CreateKernelConfig,
    OpenMpConfig,
    VectorizationConfig,
+    AUTO
 )
 from .backend import KernelFunction
 from .types import create_numeric_type, PsIntegerType, PsScalarType
+from .backend.ast import PsAstNode
 from .backend.ast.structural import PsBlock, PsLoop
 from .backend.kernelcreation import (
    KernelCreationContext,
@@ -72,8 +76,12 @@ def create_kernel(
    return driver(assignments)


+def get_driver(cfg: CreateKernelConfig, *, retain_intermediates: bool = False):
+    return DefaultKernelCreationDriver(cfg, retain_intermediates)
+
+
 class DefaultKernelCreationDriver:
-    def __init__(self, cfg: CreateKernelConfig):
+    def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False):
        self._cfg = cfg

        idx_dtype = create_numeric_type(self._cfg.index_dtype)
@@ -87,59 +95,43 @@ class DefaultKernelCreationDriver:
        self._target = self._cfg.get_target()
        self._platform = self._get_platform()

+        if retain_intermediates:
+            self._intermediates = CodegenIntermediates()
+        else:
+            self._intermediates = None
+
+    @property
+    def intermediates(self) -> CodegenIntermediates | None:
+        return self._intermediates
+
    def __call__(
        self,
        assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
    ):
-        if isinstance(assignments, AssignmentBase):
-            assignments = [assignments]
-
-        if not isinstance(assignments, AssignmentCollection):
-            assignments = AssignmentCollection(assignments)  # type: ignore
-
-        _ = _parse_simplification_hints(assignments)
-
-        analysis = KernelAnalysis(
-            self._ctx,
-            not self._cfg.skip_independence_check,
-            not self._cfg.allow_double_writes,
+        kernel_body = self.parse_kernel_body(
+            assignments
        )
-        analysis(assignments)
-
-        if len(self._ctx.fields.index_fields) > 0 or self._cfg.index_field is not None:
-            ispace = create_sparse_iteration_space(
-                self._ctx, assignments, index_field=self._cfg.index_field
-            )
-        else:
-            ispace = create_full_iteration_space(
-                self._ctx,
-                assignments,
-                ghost_layers=self._cfg.ghost_layers,
-                iteration_slice=self._cfg.iteration_slice,
-            )
-
-        self._ctx.set_iteration_space(ispace)
-
-        freeze = FreezeExpressions(self._ctx)
-        kernel_body = freeze(assignments)
-
-        typify = Typifier(self._ctx)
-        kernel_body = typify(kernel_body)

        match self._platform:
            case GenericCpu():
                kernel_ast = self._platform.materialize_iteration_space(
-                    kernel_body, ispace
+                    kernel_body, self._ctx.get_iteration_space()
                )
            case GenericGpu():
                kernel_ast, gpu_threads = self._platform.materialize_iteration_space(
-                    kernel_body, ispace
+                    kernel_body, self._ctx.get_iteration_space()
                )

+        if self._intermediates is not None:
+            self._intermediates.materialized_ispace = kernel_ast.clone()
+
        #   Fold and extract constants
        elim_constants = EliminateConstants(self._ctx, extract_constant_exprs=True)
        kernel_ast = cast(PsBlock, elim_constants(kernel_ast))

+        if self._intermediates is not None:
+            self._intermediates.constants_eliminated = kernel_ast.clone()
+
        #   Target-Specific optimizations
        if self._cfg.target.is_cpu():
            kernel_ast = self._transform_for_cpu(kernel_ast)
@@ -154,6 +146,9 @@ class DefaultKernelCreationDriver:
        select_functions = SelectFunctions(self._platform)
        kernel_ast = cast(PsBlock, select_functions(kernel_ast))

+        if self._intermediates is not None:
+            self._intermediates.lowered = kernel_ast.clone()
+
        #   Late canonicalization pass: Canonicalize new symbols introduced by LowerToC

        canonicalize = CanonicalizeSymbols(self._ctx, True)
@@ -179,13 +174,69 @@ class DefaultKernelCreationDriver:
                self._cfg.get_jit(),
            )

+    def parse_kernel_body(
+        self,
+        assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
+    ) -> PsBlock:
+        if isinstance(assignments, AssignmentBase):
+            assignments = [assignments]
+
+        if not isinstance(assignments, AssignmentCollection):
+            assignments = AssignmentCollection(assignments)  # type: ignore
+
+        _ = _parse_simplification_hints(assignments)
+
+        analysis = KernelAnalysis(
+            self._ctx,
+            not self._cfg.skip_independence_check,
+            not self._cfg.allow_double_writes,
+        )
+        analysis(assignments)
+
+        if self._cfg.index_field is not None:
+            ispace = create_sparse_iteration_space(
+                self._ctx, assignments, index_field=self._cfg.index_field
+            )
+        else:
+            gls = self._cfg.ghost_layers
+            islice = self._cfg.iteration_slice
+
+            if gls is None and islice is None:
+                gls = AUTO
+
+            ispace = create_full_iteration_space(
+                self._ctx,
+                assignments,
+                ghost_layers=gls,
+                iteration_slice=islice,
+            )
+
+        self._ctx.set_iteration_space(ispace)
+
+        freeze = FreezeExpressions(self._ctx)
+        kernel_body = freeze(assignments)
+
+        typify = Typifier(self._ctx)
+        kernel_body = typify(kernel_body)
+
+        if self._intermediates is not None:
+            self._intermediates.parsed_body = kernel_body.clone()
+
+        return kernel_body
+
    def _transform_for_cpu(self, kernel_ast: PsBlock):
        canonicalize = CanonicalizeSymbols(self._ctx, True)
        kernel_ast = cast(PsBlock, canonicalize(kernel_ast))

+        if self._intermediates is not None:
+            self._intermediates.cpu_canonicalize = kernel_ast.clone()
+
        hoist_invariants = HoistLoopInvariantDeclarations(self._ctx)
        kernel_ast = cast(PsBlock, hoist_invariants(kernel_ast))

+        if self._intermediates is not None:
+            self._intermediates.cpu_hoist_invariants = kernel_ast.clone()
+
        cpu_cfg = self._cfg.cpu_optim

        if cpu_cfg is None:
@@ -207,6 +258,9 @@ class DefaultKernelCreationDriver:
            add_omp = AddOpenMP(self._ctx, params)
            kernel_ast = cast(PsBlock, add_omp(kernel_ast))

+            if self._intermediates is not None:
+                self._intermediates.cpu_openmp = kernel_ast.clone()
+
        if cpu_cfg.use_cacheline_zeroing:
            raise NotImplementedError("CL-zeroing not implemented yet")

@@ -262,9 +316,15 @@ class DefaultKernelCreationDriver:

        kernel_ast = vectorizer.vectorize_select_loops(kernel_ast, loop_predicate)

+        if self._intermediates is not None:
+            self._intermediates.cpu_vectorize = kernel_ast.clone()
+
        select_intrin = SelectIntrinsics(self._ctx, self._platform)
        kernel_ast = cast(PsBlock, select_intrin(kernel_ast))

+        if self._intermediates is not None:
+            self._intermediates.cpu_select_intrins = kernel_ast.clone()
+
        return kernel_ast

    def _get_platform(self) -> Platform:
@@ -310,6 +370,60 @@ class DefaultKernelCreationDriver:
        )


+@dataclass
+class StageResult:
+    ast: PsAstNode
+    label: str
+
+
+class StageResultSlot:
+    def __init__(self, description: str | None = None):
+        self._description = description
+        self._name: str
+        self._lookup: str
+
+    def __set_name__(self, owner, name: str):
+        self._name = name
+        self._lookup = f"_{name}"
+
+    def __get__(self, obj, objtype=None) -> StageResult | None:
+        if obj is None:
+            return None
+
+        ast = getattr(obj, self._lookup, None)
+        if ast is not None:
+            descr = self._name if self._description is None else self._description
+            return StageResult(ast, descr)
+        else:
+            return None
+
+    def __set__(self, obj, val: PsAstNode | None):
+        setattr(obj, self._lookup, val)
+
+
+class CodegenIntermediates:
+    """Intermediate results produced by the code generator."""
+
+    parsed_body = StageResultSlot("Freeze & Type Deduction")
+    materialized_ispace = StageResultSlot("Iteration Space Materialization")
+    constants_eliminated = StageResultSlot("Constant Elimination")
+    cpu_canonicalize = StageResultSlot("CPU: Symbol Canonicalization")
+    cpu_hoist_invariants = StageResultSlot("CPU: Hoisting of Loop Invariants")
+    cpu_vectorize = StageResultSlot("CPU: Vectorization")
+    cpu_select_intrins = StageResultSlot("CPU: Intrinsics Selection")
+    cpu_openmp = StageResultSlot("CPU: OpenMP Instrumentation")
+    lowered = StageResultSlot("C Language Lowering")
+
+    @property
+    def available_stages(self) -> Sequence[StageResult]:
+        all_results: list[StageResult | None] = [
+            getattr(self, name)
+            for name, slot in CodegenIntermediates.__dict__.items()
+            if isinstance(slot, StageResultSlot)
+        ]
+        return tuple(filter(lambda r: r is not None, all_results))  # type: ignore
+
+
 def create_staggered_kernel(
    assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs
 ):

--- a/src/pystencils/simp/simplificationstrategy.py
+++ b/src/pystencils/simp/simplificationstrategy.py
@@ -57,7 +57,7 @@ class SimplificationStrategy:

            def __str__(self):
                try:
-                    import tabulate
+                    from tabulate import tabulate
                    return tabulate(self.elements, headers=['Name', 'Runtime', 'Adds', 'Muls', 'Divs', 'Total'])
                except ImportError:
                    result = "Name, Adds, Muls, Divs, Runtime\n"

--- a/src/pystencils/target.py
+++ b/src/pystencils/target.py
@@ -87,7 +87,7 @@ class Target(Flag):
    """

    GPU = CUDA
-    """Alias for backward compatibility."""
+    """Alias for `Target.CUDA`, for backward compatibility."""

    SYCL = _GPU | _SYCL
    """SYCL kernel target.

--- a/src/pystencils/types/types.py
+++ b/src/pystencils/types/types.py
@@ -683,7 +683,7 @@ class PsIeeeFloatType(PsScalarType):
    def create_constant(self, value: Any) -> Any:
        np_type = self.NUMPY_TYPES[self._width]

-        if isinstance(value, (int, float, np.floating)):
+        if isinstance(value, (int, float, np.integer, np.floating)):
            finfo = np.finfo(np_type)  # type: ignore
            if value < finfo.min or value > finfo.max:
                raise PsTypeError(

--- a/tests/fixtures.py
+++ b/tests/fixtures.py
+"""Fixtures for the pystencils test suite
+
+This module provides a number of fixtures used by the pystencils test suite.
+Use these fixtures wherever applicable to extend the code surface area covered
+by your tests:
+
+- All tests that should work for every target should use the `target` fixture
+- All tests that should work with the highest optimization level for every target
+  should use the `gen_config` fixture
+- Use the `xp` fixture to access the correct array module (numpy or cupy) depending
+  on the target
+
+"""
+
+import pytest
+
+from types import ModuleType
+from dataclasses import replace
+
+import pystencils as ps
+
+AVAILABLE_TARGETS = [ps.Target.GenericCPU]
+
+try:
+    import cupy
+
+    AVAILABLE_TARGETS += [ps.Target.CUDA]
+except ImportError:
+    pass
+
+AVAILABLE_TARGETS += ps.Target.available_vector_cpu_targets()
+TARGET_IDS = [t.name for t in AVAILABLE_TARGETS]
+
+@pytest.fixture(params=AVAILABLE_TARGETS, ids=TARGET_IDS)
+def target(request) -> ps.Target:
+    """Provides all code generation targets available on the current hardware"""
+    return request.param
+
+@pytest.fixture
+def gen_config(target: ps.Target):
+    """Default codegen configuration for the current target.
+    
+    For GPU targets, set default indexing options.
+    For vector-CPU targets, set default vectorization config.
+    """
+
+    gen_config = ps.CreateKernelConfig(target=target)
+
+    if target.is_vector_cpu():
+        gen_config = replace(
+            gen_config,
+            cpu_optim=ps.CpuOptimConfig(
+                vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
+            ),
+        )
+
+    return gen_config
+
+@pytest.fixture()
+def xp(target: ps.Target) -> ModuleType:
+    """Primary array module for the current target.
+    
+    Returns:
+        `cupy` if `target == Target.CUDA`, and `numpy` otherwise
+    """
+    if target == ps.Target.CUDA:
+        import cupy as xp
+        return xp
+    else:
+        import numpy as np
+        return np
--- a/tests/frontend/test_field.py
+++ b/tests/frontend/test_field.py
@@ -3,23 +3,39 @@ import pytest
 import sympy as sp

 import pystencils as ps
-from pystencils import TypedSymbol, DEFAULTS
-from pystencils.types import create_type
-from pystencils.field import Field, FieldType, layout_string_to_tuple
+from pystencils import DEFAULTS
+from pystencils.field import (
+    Field,
+    FieldType,
+    layout_string_to_tuple,
+    spatial_layout_string_to_tuple,
+)


 def test_field_basic():
-    f = Field.create_generic('f', spatial_dimensions=2)
+    f = Field.create_generic("f", spatial_dimensions=2)
    assert FieldType.is_generic(f)
-    assert f['E'] == f[1, 0]
-    assert f['N'] == f[0, 1]
-    assert '_' in f.center._latex('dummy')
-
-    assert f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[0] == 0
-    assert f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[1] == 0
-
-    assert f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[0] == 0
-    assert f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[1] == 0
+    assert f["E"] == f[1, 0]
+    assert f["N"] == f[0, 1]
+    assert "_" in f.center._latex("dummy")
+
+    assert (
+        f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[0]
+        == 0
+    )
+    assert (
+        f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[1]
+        == 0
+    )
+
+    assert (
+        f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[0]
+        == 0
+    )
+    assert (
+        f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[1]
+        == 0
+    )

    f1 = f.new_field_with_different_name("f1")
    assert f1.ndim == f.ndim
@@ -28,7 +44,7 @@ def test_field_basic():
    fixed = ps.fields("f(5, 5) : double[20, 20]")
    assert fixed.neighbor_vector((1, 1)).shape == (5, 5)

-    f = Field.create_fixed_size('f', (10, 10), strides=(80, 8), dtype=np.float64)
+    f = Field.create_fixed_size("f", (10, 10), strides=(80, 8), dtype=np.float64)
    assert f.spatial_strides == (10, 1)
    assert f.index_strides == ()
    assert f.center_vector == sp.Matrix([f.center])
@@ -37,84 +53,99 @@ def test_field_basic():
    assert f1.ndim == f.ndim
    assert f1.values_per_cell() == f.values_per_cell()

-    f = Field.create_fixed_size('f', (8, 8, 2, 2), index_dimensions=2)
-    assert f.center_vector == sp.Matrix([[f(0, 0), f(0, 1)],
-                                         [f(1, 0), f(1, 1)]])
+    f = Field.create_fixed_size("f", (8, 8, 2, 2), index_dimensions=2)
+    assert f.center_vector == sp.Matrix([[f(0, 0), f(0, 1)], [f(1, 0), f(1, 1)]])
    field_access = f[1, 1]
    assert field_access.nr_of_coordinates == 2
-    assert field_access.offset_name == 'NE'
+    assert field_access.offset_name == "NE"
    neighbor = field_access.neighbor(coord_id=0, offset=-2)
    assert neighbor.offsets == (-1, 1)
-    assert '_' in neighbor._latex('dummy')
+    assert "_" in neighbor._latex("dummy")

-    f = Field.create_fixed_size('f', (8, 8, 2, 2, 2), index_dimensions=3)
-    assert f.center_vector == sp.Array([[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)])
+    f = Field.create_fixed_size("f", (8, 8, 2, 2, 2), index_dimensions=3)
+    assert f.center_vector == sp.Array(
+        [[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)]
+    )

-    f = Field.create_generic('f', spatial_dimensions=5, index_dimensions=2)
+    f = Field.create_generic("f", spatial_dimensions=5, index_dimensions=2)
    field_access = f[1, -1, 2, -3, 0](1, 0)
    assert field_access.offsets == (1, -1, 2, -3, 0)
    assert field_access.index == (1, 0)


 def test_error_handling():
-    struct_dtype = np.dtype([('a', np.int32), ('b', np.float64), ('c', np.uint32)], align=True)
-    Field.create_generic('f', spatial_dimensions=2, index_dimensions=0, dtype=struct_dtype)
+    struct_dtype = np.dtype(
+        [("a", np.int32), ("b", np.float64), ("c", np.uint32)], align=True
+    )
+    Field.create_generic(
+        "f", spatial_dimensions=2, index_dimensions=0, dtype=struct_dtype
+    )
    with pytest.raises(ValueError) as e:
-        Field.create_generic('f', spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype)
-    assert 'index dimension' in str(e.value)
+        Field.create_generic(
+            "f", spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype
+        )
+    assert "index dimension" in str(e.value)

-    arr = np.array([[[(1,)*3, (2,)*3, (3,)*3]]*2], dtype=struct_dtype)
-    Field.create_from_numpy_array('f', arr, index_dimensions=0)
+    arr = np.array([[[(1,) * 3, (2,) * 3, (3,) * 3]] * 2], dtype=struct_dtype)
+    Field.create_from_numpy_array("f", arr, index_dimensions=0)
    with pytest.raises(ValueError) as e:
-        Field.create_from_numpy_array('f', arr, index_dimensions=1)
-    assert 'Structured arrays' in str(e.value)
+        Field.create_from_numpy_array("f", arr, index_dimensions=1)
+    assert "Structured arrays" in str(e.value)

    arr = np.zeros([3, 3, 3])
-    Field.create_from_numpy_array('f', arr, index_dimensions=2)
+    Field.create_from_numpy_array("f", arr, index_dimensions=2)
    with pytest.raises(ValueError) as e:
-        Field.create_from_numpy_array('f', arr, index_dimensions=3)
-    assert 'Too many' in str(e.value)
+        Field.create_from_numpy_array("f", arr, index_dimensions=3)
+    assert "Too many" in str(e.value)

-    Field.create_fixed_size('f', (3, 2, 4), index_dimensions=0, dtype=struct_dtype, layout='reverse_numpy')
+    Field.create_fixed_size(
+        "f", (3, 2, 4), index_dimensions=0, dtype=struct_dtype, layout="reverse_numpy"
+    )
    with pytest.raises(ValueError) as e:
-        Field.create_fixed_size('f', (3, 2, 4), index_dimensions=1, dtype=struct_dtype, layout='reverse_numpy')
-    assert 'Structured arrays' in str(e.value)
-
-    f = Field.create_fixed_size('f', (10, 10))
+        Field.create_fixed_size(
+            "f",
+            (3, 2, 4),
+            index_dimensions=1,
+            dtype=struct_dtype,
+            layout="reverse_numpy",
+        )
+    assert "Structured arrays" in str(e.value)
+
+    f = Field.create_fixed_size("f", (10, 10))
    with pytest.raises(ValueError) as e:
        f[1]
-    assert 'Wrong number of spatial indices' in str(e.value)
+    assert "Wrong number of spatial indices" in str(e.value)

-    f = Field.create_generic('f', spatial_dimensions=2, index_shape=(3,))
+    f = Field.create_generic("f", spatial_dimensions=2, index_shape=(3,))
    with pytest.raises(ValueError) as e:
        f(3)
-    assert 'out of bounds' in str(e.value)
+    assert "out of bounds" in str(e.value)

-    f = Field.create_fixed_size('f', (10, 10, 3, 4), index_dimensions=2)
+    f = Field.create_fixed_size("f", (10, 10, 3, 4), index_dimensions=2)
    with pytest.raises(ValueError) as e:
        f(3, 0)
-    assert 'out of bounds' in str(e.value)
+    assert "out of bounds" in str(e.value)

    with pytest.raises(ValueError) as e:
        f(1, 0)(1, 0)
-    assert 'Indexing an already indexed' in str(e.value)
+    assert "Indexing an already indexed" in str(e.value)

    with pytest.raises(ValueError) as e:
        f(1)
-    assert 'Wrong number of indices' in str(e.value)
+    assert "Wrong number of indices" in str(e.value)

    with pytest.raises(ValueError) as e:
-        Field.create_generic('f', spatial_dimensions=2, layout='wrong')
-    assert 'Unknown layout descriptor' in str(e.value)
+        Field.create_generic("f", spatial_dimensions=2, layout="wrong")
+    assert "Unknown layout descriptor" in str(e.value)

-    assert layout_string_to_tuple('fzyx', dim=4) == (3, 2, 1, 0)
+    assert layout_string_to_tuple("fzyx", dim=4) == (3, 2, 1, 0)
    with pytest.raises(ValueError) as e:
-        layout_string_to_tuple('wrong', dim=4)
-    assert 'Unknown layout descriptor' in str(e.value)
+        layout_string_to_tuple("wrong", dim=4)
+    assert "Unknown layout descriptor" in str(e.value)


 def test_decorator_scoping():
-    dst = ps.fields('dst : double[2D]')
+    dst = ps.fields("dst : double[2D]")

    def f1():
        a = sp.Symbol("a")
@@ -134,7 +165,7 @@ def test_decorator_scoping():


 def test_string_creation():
-    x, y, z = ps.fields('  x(4),    y(3,5) z : double[  3,  47]')
+    x, y, z = ps.fields("  x(4),    y(3,5) z : double[  3,  47]")
    assert x.index_shape == (4,)
    assert y.index_shape == (3, 5)
    assert z.spatial_shape == (3, 47)
@@ -142,19 +173,85 @@ def test_string_creation():

 def test_itemsize():

-    x = ps.fields('x: float32[1d]')
-    y = ps.fields('y:  float64[2d]')
-    i = ps.fields('i:  int16[1d]')
+    x = ps.fields("x: float32[1d]")
+    y = ps.fields("y:  float64[2d]")
+    i = ps.fields("i:  int16[1d]")

    assert x.itemsize == 4
    assert y.itemsize == 8
    assert i.itemsize == 2


+def test_spatial_memory_layout_descriptors():
+    assert (
+        spatial_layout_string_to_tuple("AoS", 3)
+        == spatial_layout_string_to_tuple("aos", 3)
+        == spatial_layout_string_to_tuple("ZYXF", 3)
+        == spatial_layout_string_to_tuple("zyxf", 3)
+        == (2, 1, 0)
+    )
+    assert (
+        spatial_layout_string_to_tuple("SoA", 3)
+        == spatial_layout_string_to_tuple("soa", 3)
+        == spatial_layout_string_to_tuple("FZYX", 3)
+        == spatial_layout_string_to_tuple("fzyx", 3)
+        == spatial_layout_string_to_tuple("f", 3)
+        == spatial_layout_string_to_tuple("F", 3)
+        == (2, 1, 0)
+    )
+    assert (
+        spatial_layout_string_to_tuple("c", 3)
+        == spatial_layout_string_to_tuple("C", 3)
+        == (0, 1, 2)
+    )
+
+    assert spatial_layout_string_to_tuple("C", 5) == (0, 1, 2, 3, 4)
+
+    with pytest.raises(ValueError):
+        spatial_layout_string_to_tuple("aos", -1)
+
+    with pytest.raises(ValueError):
+        spatial_layout_string_to_tuple("aos", 4)
+
+
+def test_memory_layout_descriptors():
+    assert (
+        layout_string_to_tuple("AoS", 4)
+        == layout_string_to_tuple("aos", 4)
+        == layout_string_to_tuple("ZYXF", 4)
+        == layout_string_to_tuple("zyxf", 4)
+        == (2, 1, 0, 3)
+    )
+    assert (
+        layout_string_to_tuple("SoA", 4)
+        == layout_string_to_tuple("soa", 4)
+        == layout_string_to_tuple("FZYX", 4)
+        == layout_string_to_tuple("fzyx", 4)
+        == layout_string_to_tuple("f", 4)
+        == layout_string_to_tuple("F", 4)
+        == (3, 2, 1, 0)
+    )
+    assert (
+        layout_string_to_tuple("c", 4)
+        == layout_string_to_tuple("C", 4)
+        == (0, 1, 2, 3)
+    )
+
+    assert layout_string_to_tuple("C", 5) == (0, 1, 2, 3, 4)
+
+    with pytest.raises(ValueError):
+        layout_string_to_tuple("aos", -1)
+
+    with pytest.raises(ValueError):
+        layout_string_to_tuple("aos", 5)
+
+
 def test_staggered():

    # D2Q5
-    j1, j2, j3 = ps.fields('j1(2), j2(2,2), j3(2,2,2) : double[2D]', field_type=FieldType.STAGGERED)
+    j1, j2, j3 = ps.fields(
+        "j1(2), j2(2,2), j3(2,2,2) : double[2D]", field_type=FieldType.STAGGERED
+    )

    assert j1[0, 1](1) == j1.staggered_access((0, sp.Rational(1, 2)))
    assert j1[0, 1](1) == j1.staggered_access(np.array((0, sp.Rational(1, 2))))
@@ -163,44 +260,68 @@ def test_staggered():
    assert j1[0, 1](1) == j1.staggered_access("N")
    assert j1[0, 0](1) == j1.staggered_access("S")
    assert j1.staggered_vector_access("N") == sp.Matrix([j1.staggered_access("N")])
-    assert j1.staggered_stencil_name == 'D2Q5'
+    assert j1.staggered_stencil_name == "D2Q5"

    assert j1.physical_coordinates[0] == DEFAULTS.spatial_counters[0]
    assert j1.physical_coordinates[1] == DEFAULTS.spatial_counters[1]
    assert j1.physical_coordinates_staggered[0] == DEFAULTS.spatial_counters[0] + 0.5
    assert j1.physical_coordinates_staggered[1] == DEFAULTS.spatial_counters[1] + 0.5
-    assert j1.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=True)[0] == 0.5
-    assert j1.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=True)[1] == 0.5
-    assert j1.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=True)[0] == -0.5
-    assert j1.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=True)[1] == -0.5
+    assert (
+        j1.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=True)[0]
+        == 0.5
+    )
+    assert (
+        j1.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=True)[1]
+        == 0.5
+    )
+    assert (
+        j1.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=True)[0]
+        == -0.5
+    )
+    assert (
+        j1.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=True)[1]
+        == -0.5
+    )

    assert j2[0, 1](1, 1) == j2.staggered_access((0, sp.Rational(1, 2)), 1)
    assert j2[0, 1](1, 1) == j2.staggered_access("N", 1)
-    assert j2.staggered_vector_access("N") == sp.Matrix([j2.staggered_access("N", 0), j2.staggered_access("N", 1)])
+    assert j2.staggered_vector_access("N") == sp.Matrix(
+        [j2.staggered_access("N", 0), j2.staggered_access("N", 1)]
+    )

    assert j3[0, 1](1, 1, 1) == j3.staggered_access((0, sp.Rational(1, 2)), (1, 1))
    assert j3[0, 1](1, 1, 1) == j3.staggered_access("N", (1, 1))
-    assert j3.staggered_vector_access("N") == sp.Matrix([[j3.staggered_access("N", (i, j))
-                                                        for j in range(2)] for i in range(2)])
+    assert j3.staggered_vector_access("N") == sp.Matrix(
+        [[j3.staggered_access("N", (i, j)) for j in range(2)] for i in range(2)]
+    )

    # D2Q9
-    k1, k2 = ps.fields('k1(4), k2(2) : double[2D]', field_type=FieldType.STAGGERED)
+    k1, k2 = ps.fields("k1(4), k2(2) : double[2D]", field_type=FieldType.STAGGERED)

    assert k1[1, 1](2) == k1.staggered_access("NE")
    assert k1[0, 0](2) == k1.staggered_access("SW")
    assert k1[0, 0](3) == k1.staggered_access("NW")
-    
+
    a = k1.staggered_access("NE")
-    assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(1, 2), sp.Rational(1, 2)]
+    assert a._staggered_offset(a.offsets, a.index[0]) == [
+        sp.Rational(1, 2),
+        sp.Rational(1, 2),
+    ]
    a = k1.staggered_access("SW")
-    assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(-1, 2), sp.Rational(-1, 2)]
+    assert a._staggered_offset(a.offsets, a.index[0]) == [
+        sp.Rational(-1, 2),
+        sp.Rational(-1, 2),
+    ]
    a = k1.staggered_access("NW")
-    assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(-1, 2), sp.Rational(1, 2)]
+    assert a._staggered_offset(a.offsets, a.index[0]) == [
+        sp.Rational(-1, 2),
+        sp.Rational(1, 2),
+    ]

    # sign reversed when using as flux field
-    r = ps.fields('r(2) : double[2D]', field_type=FieldType.STAGGERED_FLUX)
+    r = ps.fields("r(2) : double[2D]", field_type=FieldType.STAGGERED_FLUX)
    assert r[0, 0](0) == r.staggered_access("W")
    assert -r[1, 0](0) == r.staggered_access("E")


-# test_staggered()
\ No newline at end of file
+# test_staggered()
--- a/tests/kernelcreation/test_domain_kernels.py
+++ b/tests/kernelcreation/test_domain_kernels.py
@@ -18,35 +18,6 @@ from pystencils.assignment import assignment_from_stencil
 from pystencils.kernelcreation import create_kernel, KernelFunction
 from pystencils.backend.emission import emit_code

-AVAILABLE_TARGETS = [Target.GenericCPU]
-
-try:
-    import cupy
-
-    AVAILABLE_TARGETS += [Target.CUDA]
-except ImportError:
-    pass
-
-AVAILABLE_TARGETS += Target.available_vector_cpu_targets()
-TEST_IDS = [t.name for t in AVAILABLE_TARGETS]
-
-
-@pytest.fixture(params=AVAILABLE_TARGETS, ids=TEST_IDS)
-def gen_config(request):
-    target: Target = request.param
-
-    gen_config = CreateKernelConfig(target=target)
-
-    if Target._VECTOR in target:
-        gen_config = replace(
-            gen_config,
-            cpu_optim=CpuOptimConfig(
-                vectorize=VectorizationConfig(assume_inner_stride_one=True)
-            ),
-        )
-
-    return gen_config
-

 def inspect_dp_kernel(kernel: KernelFunction, gen_config: CreateKernelConfig):
    code = emit_code(kernel)

--- a/tests/kernelcreation/test_functions.py
+++ b/tests/kernelcreation/test_functions.py
@@ -7,7 +7,6 @@ from pystencils.backend.ast import dfs_preorder
 from pystencils.backend.ast.expressions import PsCall


-
 def unary_function(name, xp):
    return {
        "exp": (sp.exp, xp.exp),

--- a/tests/kernelcreation/test_iteration_slices.py
+++ b/tests/kernelcreation/test_iteration_slices.py
+import numpy as np
+import sympy as sp
+import pytest
+
+from dataclasses import replace
+
+from pystencils import (
+    DEFAULTS,
+    Assignment,
+    Field,
+    TypedSymbol,
+    create_kernel,
+    make_slice,
+    Target,
+    CreateKernelConfig,
+    GpuIndexingConfig,
+    DynamicType,
+)
+from pystencils.sympyextensions.integer_functions import int_rem
+from pystencils.simp import sympy_cse_on_assignment_list
+from pystencils.slicing import normalize_slice
+from pystencils.backend.jit.gpu_cupy import CupyKernelWrapper
+
+
+def test_sliced_iteration():
+    size = (4, 4)
+    src_arr = np.ones(size)
+    dst_arr = np.zeros_like(src_arr)
+    src_field = Field.create_from_numpy_array("src", src_arr)
+    dst_field = Field.create_from_numpy_array("dst", dst_arr)
+
+    a, b = sp.symbols("a b")
+    update_rule = Assignment(
+        dst_field[0, 0],
+        (
+            a * src_field[0, 1]
+            + a * src_field[0, -1]
+            + b * src_field[1, 0]
+            + b * src_field[-1, 0]
+        )
+        / 4,
+    )
+
+    x_end = TypedSymbol("x_end", "int")
+    s = make_slice[1:x_end, 1]
+    x_end_value = size[1] - 1
+    kernel = create_kernel(
+        sympy_cse_on_assignment_list([update_rule]), iteration_slice=s
+    ).compile()
+
+    kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
+
+    expected_result = np.zeros(size)
+    expected_result[1:x_end_value, 1] = 1
+    np.testing.assert_almost_equal(expected_result, dst_arr)
+
+
+@pytest.mark.parametrize(
+    "islice",
+    [
+        make_slice[1:-1, 1:-1],
+        make_slice[3, 2:-2],
+        make_slice[2:-2:2, ::3],
+        make_slice[10:, :-5:2],
+        make_slice[-5:-1, -1],
+        make_slice[-3, -1]
+    ],
+)
+def test_numerical_slices(gen_config: CreateKernelConfig, xp, islice):
+    shape = (16, 16)
+
+    f_arr = xp.zeros(shape)
+    expected = xp.zeros_like(f_arr)
+    expected[islice] = 1.0
+
+    f = Field.create_from_numpy_array("f", f_arr)
+
+    update = Assignment(f.center(), 1)
+    gen_config = replace(gen_config, iteration_slice=islice)
+
+    try:
+        kernel = create_kernel(update, gen_config).compile()
+    except NotImplementedError:
+        if gen_config.target.is_vector_cpu():
+            #   TODO Gather/Scatter not implemented yet
+            pytest.xfail("Gather/Scatter not available yet")
+
+    kernel(f=f_arr)
+
+    xp.testing.assert_array_equal(f_arr, expected)
+
+
+def test_symbolic_slice(gen_config: CreateKernelConfig, xp):
+    shape = (16, 16)
+
+    sx, sy, ex, ey = [
+        TypedSymbol(n, DynamicType.INDEX_TYPE) for n in ("sx", "sy", "ex", "ey")
+    ]
+
+    f_arr = xp.zeros(shape)
+
+    f = Field.create_from_numpy_array("f", f_arr)
+
+    update = Assignment(f.center(), 1)
+    islice = make_slice[sy:ey, sx:ex]
+    gen_config = replace(gen_config, iteration_slice=islice)
+    kernel = create_kernel(update, gen_config).compile()
+
+    for slic in [make_slice[:, :], make_slice[1:-1, 2:-2], make_slice[8:14, 7:11]]:
+        slic = normalize_slice(slic, shape)
+        expected = xp.zeros_like(f_arr)
+        expected[slic] = 1.0
+
+        f_arr[:] = 0.0
+
+        kernel(
+            f=f_arr,
+            sy=slic[0].start,
+            ey=slic[0].stop,
+            sx=slic[1].start,
+            ex=slic[1].stop,
+        )
+
+        xp.testing.assert_array_equal(f_arr, expected)
+
+
+def test_triangle_pattern(gen_config: CreateKernelConfig, xp):
+    shape = (16, 16)
+
+    f_arr = xp.zeros(shape)
+    f = Field.create_from_numpy_array("f", f_arr)
+
+    expected = xp.zeros_like(f_arr)
+    for r in range(shape[0]):
+        expected[r, r:] = 1.0
+
+    update = Assignment(f.center(), 1)
+    outer_counter = DEFAULTS.spatial_counters[0]
+    islice = make_slice[:, outer_counter:]
+    gen_config = replace(gen_config, iteration_slice=islice)
+
+    if gen_config.target == Target.CUDA:
+        gen_config = replace(
+            gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
+        )
+
+    kernel = create_kernel(update, gen_config).compile()
+
+    if isinstance(kernel, CupyKernelWrapper):
+        kernel.block_size = shape + (1,)
+        kernel.num_blocks = (1, 1, 1)
+
+    kernel(f=f_arr)
+
+    xp.testing.assert_array_equal(f_arr, expected)
+
+
+def test_red_black_pattern(gen_config: CreateKernelConfig, xp):
+    shape = (16, 16)
+
+    f_arr = xp.zeros(shape)
+    f = Field.create_from_numpy_array("f", f_arr)
+
+    expected = xp.zeros_like(f_arr)
+    for r in range(shape[0]):
+        start = 0 if r % 2 == 0 else 1
+        expected[r, start::2] = 1.0
+
+    update = Assignment(f.center(), 1)
+    outer_counter = DEFAULTS.spatial_counters[0]
+    start = sp.Piecewise((0, sp.Eq(int_rem(outer_counter, 2), 0)), (1, True))
+    islice = make_slice[:, start::2]
+    gen_config = replace(gen_config, iteration_slice=islice)
+
+    if gen_config.target == Target.CUDA:
+        gen_config = replace(
+            gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
+        )
+
+    try:
+        kernel = create_kernel(update, gen_config).compile()
+    except NotImplementedError:
+        if gen_config.target.is_vector_cpu():
+            pytest.xfail("Gather/Scatter not implemented yet")
+
+    if isinstance(kernel, CupyKernelWrapper):
+        kernel.block_size = (8, 16, 1)
+        kernel.num_blocks = (1, 1, 1)
+
+    kernel(f=f_arr)
+
+    xp.testing.assert_array_equal(f_arr, expected)
--- a/tests/kernelcreation/test_sliced_iteration.py
+++ b/tests/kernelcreation/test_sliced_iteration.py
-import numpy as np
-import sympy as sp
-
-from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
-from pystencils.simp import sympy_cse_on_assignment_list
-
-
-def test_sliced_iteration():
-    size = (4, 4)
-    src_arr = np.ones(size)
-    dst_arr = np.zeros_like(src_arr)
-    src_field = Field.create_from_numpy_array('src', src_arr)
-    dst_field = Field.create_from_numpy_array('dst', dst_arr)
-
-    a, b = sp.symbols("a b")
-    update_rule = Assignment(dst_field[0, 0],
-                             (a * src_field[0, 1] + a * src_field[0, -1] +
-                              b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
-
-    x_end = TypedSymbol("x_end", "int")
-    s = make_slice[1:x_end, 1]
-    x_end_value = size[1] - 1
-    kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
-
-    kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
-
-    expected_result = np.zeros(size)
-    expected_result[1:x_end_value, 1] = 1
-    np.testing.assert_almost_equal(expected_result, dst_arr)
--- a/tests/nbackend/kernelcreation/test_iteration_space.py
+++ b/tests/nbackend/kernelcreation/test_iteration_space.py
@@ -129,6 +129,36 @@ def test_slices_with_negative_start():
    )


+def test_negative_singular_slices():
+    ctx = KernelCreationContext()
+    factory = AstFactory(ctx)
+
+    archetype_field = Field.create_generic("f", spatial_dimensions=2, layout="fzyx")
+    ctx.add_field(archetype_field)
+    archetype_arr = ctx.get_buffer(archetype_field)
+
+    islice = (-2, -1)
+    ispace = FullIterationSpace.create_from_slice(ctx, islice, archetype_field)
+
+    dims = ispace.dimensions
+
+    assert dims[0].start.structurally_equal(
+        PsExpression.make(archetype_arr.shape[0]) + factory.parse_index(-2)
+    )
+
+    assert dims[0].stop.structurally_equal(
+        PsExpression.make(archetype_arr.shape[0]) + factory.parse_index(-1)
+    )
+
+    assert dims[1].start.structurally_equal(
+        PsExpression.make(archetype_arr.shape[1]) + factory.parse_index(-1)
+    )
+
+    assert dims[1].stop.structurally_equal(
+        PsExpression.make(archetype_arr.shape[1])
+    )
+
+
 def test_field_independent_slices():
    ctx = KernelCreationContext()


--- a/tests/nbackend/test_vectorization.py
+++ b/tests/nbackend/test_vectorization.py
@@ -89,6 +89,11 @@ TEST_SETUPS: list[VectorTestSetup] = list(
 TEST_IDS = [t.name for t in TEST_SETUPS]


+@pytest.fixture(params=TEST_SETUPS, ids=TEST_IDS)
+def vectorization_setup(request) -> VectorTestSetup:
+    return request.param
+
+
 def create_vector_kernel(
    assignments: list[Assignment],
    field: Field,
@@ -139,9 +144,9 @@ def create_vector_kernel(
    return kernel


-@pytest.mark.parametrize("setup", TEST_SETUPS, ids=TEST_IDS)
 @pytest.mark.parametrize("ghost_layers", [0, 2])
-def test_update_kernel(setup: VectorTestSetup, ghost_layers: int):
+def test_update_kernel(vectorization_setup: VectorTestSetup, ghost_layers: int):
+    setup = vectorization_setup
    src, dst = fields(f"src(2), dst(4): {setup.numeric_dtype}[2D]", layout="fzyx")

    x = sp.symbols("x_:4")
@@ -197,8 +202,8 @@ def test_update_kernel(setup: VectorTestSetup, ghost_layers: int):
            np.testing.assert_equal(dst_arr[:, -i, :], 0.0)


-@pytest.mark.parametrize("setup", TEST_SETUPS, ids=TEST_IDS)
-def test_trailing_iterations(setup: VectorTestSetup):
+def test_trailing_iterations(vectorization_setup: VectorTestSetup):
+    setup = vectorization_setup
    f = fields(f"f(1): {setup.numeric_dtype}[1D]", layout="fzyx")

    update = [Assignment(f(0), 2 * f(0))]
@@ -216,3 +221,24 @@ def test_trailing_iterations(setup: VectorTestSetup):
        kernel(f=f_arr)

        np.testing.assert_equal(f_arr, 2.0)
+
+
+def test_only_trailing_iterations(vectorization_setup: VectorTestSetup):
+    setup = vectorization_setup
+    f = fields(f"f(1): {setup.numeric_dtype}[1D]", layout="fzyx")
+
+    update = [Assignment(f(0), 2 * f(0))]
+
+    kernel = create_vector_kernel(update, f, setup)
+
+    for trailing_iters in range(1, setup.lanes):
+        shape = (trailing_iters, 1)
+        f_arr = create_numpy_array_with_layout(
+            shape, layout=(1, 0), dtype=setup.numeric_dtype.numpy_dtype
+        )
+
+        f_arr[:] = 1.0
+
+        kernel(f=f_arr)
+
+        np.testing.assert_equal(f_arr, 2.0)
No results found