Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
from typing import overload
from .backend.ast import PsAstNode
from .backend.emission import CAstPrinter, IRAstPrinter, EmissionError
from .backend.kernelfunction import KernelFunction
from .kernelcreation import StageResult, CodegenIntermediates
from abc import ABC, abstractmethod
_UNABLE_TO_DISPLAY_CPP = """
<div>
<b>Unable to display C code for this abstract syntax tree</b>
<p>
This intermediate abstract syntax tree contains nodes that cannot be
printed as valid C code.
</p>
</div>
"""
_GRAPHVIZ_NOT_IMPLEMENTED = """
<div>
<b>AST Visualization Unavailable</b>
<p>
AST visualization using GraphViz is not implemented yet.
</p>
</div>
"""
_ERR_MSG = """
<div style="font-family: monospace; background-color: #EEEEEE; white-space: nowrap; overflow-x: scroll">
{}
</div>
"""
class CodeInspectionBase(ABC):
def __init__(self) -> None:
self._ir_printer = IRAstPrinter(annotate_constants=False)
self._c_printer = CAstPrinter()
def _ir_tab(self, ir_obj: PsAstNode | KernelFunction):
import ipywidgets as widgets
ir = self._ir_printer(ir_obj)
ir_tab = widgets.HTML(self._highlight_as_cpp(ir))
self._apply_tab_layout(ir_tab)
return ir_tab
def _cpp_tab(self, ir_obj: PsAstNode | KernelFunction):
import ipywidgets as widgets
try:
cpp = self._c_printer(ir_obj)
cpp_tab = widgets.HTML(self._highlight_as_cpp(cpp))
except EmissionError as e:
cpp_tab = widgets.VBox(
children=[
widgets.HTML(_UNABLE_TO_DISPLAY_CPP),
widgets.Accordion(
children=[widgets.HTML(_ERR_MSG.format(e.args[0]))],
titles=["Error Details"],
),
]
)
self._apply_tab_layout(cpp_tab)
return cpp_tab
def _graphviz_tab(self, ir_obj: PsAstNode | KernelFunction):
import ipywidgets as widgets
graphviz_tab = widgets.HTML(_GRAPHVIZ_NOT_IMPLEMENTED)
self._apply_tab_layout(graphviz_tab)
return graphviz_tab
def _apply_tab_layout(self, tab):
tab.layout.display = "inline-block"
tab.layout.padding = "0 15pt 0 0"
def _highlight_as_cpp(self, code: str) -> str:
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import CppLexer
formatter = HtmlFormatter(
prestyles="white-space: pre;",
)
html_code = highlight(code, CppLexer(), formatter)
return html_code
def _ipython_display_(self):
from IPython.display import display
display(self._widget())
@abstractmethod
def _widget(self): ...
class AstInspection(CodeInspectionBase):
"""Inspect an abstract syntax tree produced by the code generation backend.
**Interactive:** This class can be used in Jupyter notebooks to interactively
explore an abstract syntax tree.
"""
def __init__(
self,
ast: PsAstNode,
show_ir: bool = True,
show_cpp: bool = True,
show_graph: bool = True,
):
super().__init__()
self._ast = ast
self._show_ir = show_ir
self._show_cpp = show_cpp
self._show_graph = show_graph
def _widget(self):
import ipywidgets as widgets
tabs = []
if self._show_ir:
tabs.append(self._ir_tab(self._ast))
if self._show_cpp:
tabs.append(self._cpp_tab(self._ast))
if self._show_graph:
tabs.append(self._graphviz_tab(self._ast))
tabs = widgets.Tab(children=tabs)
tabs.titles = ["IR Code", "C Code", "AST Visualization"]
tabs.layout.height = "250pt"
return tabs
class KernelInspection(CodeInspectionBase):
def __init__(
self,
kernel: KernelFunction,
show_ir: bool = True,
show_cpp: bool = True,
show_graph: bool = True,
) -> None:
super().__init__()
self._kernel = kernel
self._show_ir = show_ir
self._show_cpp = show_cpp
self._show_graph = show_graph
def _widget(self):
import ipywidgets as widgets
tabs = []
if self._show_ir:
tabs.append(self._ir_tab(self._kernel))
if self._show_cpp:
tabs.append(self._cpp_tab(self._kernel))
if self._show_graph:
tabs.append(self._graphviz_tab(self._kernel))
tabs = widgets.Tab(children=tabs)
tabs.titles = ["IR Code", "C Code", "AST Visualization"]
tabs.layout.height = "250pt"
return tabs
class IntermediatesInspection:
def __init__(
self,
intermediates: CodegenIntermediates,
show_ir: bool = True,
show_cpp: bool = True,
show_graph: bool = True,
):
self._intermediates = intermediates
self._show_ir = show_ir
self._show_cpp = show_cpp
self._show_graph = show_graph
def _ipython_display_(self):
from IPython.display import display
import ipywidgets as widgets
stages = self._intermediates.available_stages
previews: list[AstInspection] = [
AstInspection(
stage.ast,
show_ir=self._show_ir,
show_cpp=self._show_cpp,
show_graph=self._show_graph,
)
for stage in stages
]
labels: list[str] = [stage.label for stage in stages]
code_views = [p._widget() for p in previews]
for v in code_views:
v.layout.width = "100%"
select_label = widgets.HTML("<div><b>Code Generator Stages</b></div>")
select = widgets.Select(options=labels)
select.layout.height = "250pt"
selection_box = widgets.VBox([select_label, select])
selection_box.layout.overflow = "visible"
preview_label = widgets.HTML("<div><b>Preview</b></div>")
preview_stack = widgets.Stack(children=code_views)
preview_stack.layout.overflow = "hidden"
preview_box = widgets.VBox([preview_label, preview_stack])
widgets.jslink((select, "index"), (preview_stack, "selected_index"))
grid = widgets.GridBox(
[selection_box, preview_box],
layout=widgets.Layout(grid_template_columns="max-content auto"),
)
display(grid)
@overload
def inspect(obj: PsAstNode): ...
@overload
def inspect(obj: KernelFunction): ...
@overload
def inspect(obj: StageResult): ...
@overload
def inspect(obj: CodegenIntermediates): ...
def inspect(obj, show_ir: bool = True, show_cpp: bool = True, show_graph: bool = True):
"""Interactively inspect various products of the code generator.
When run inside a Jupyter notebook, this function displays an inspection widget
for the following types of objects:
- `PsAstNode`
- `KernelFunction`
- `StageResult`
- `CodegenIntermediates`
"""
from IPython.display import display
match obj:
case PsAstNode():
preview = AstInspection(
obj, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
)
case KernelFunction():
preview = KernelInspection(
obj, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
)
case StageResult(ast, _):
preview = AstInspection(
ast, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
)
case CodegenIntermediates():
preview = IntermediatesInspection(
obj, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
)
case _:
raise ValueError(f"Cannot inspect object of type {type(obj)}")
display(preview)
from __future__ import annotations
from typing import cast, Sequence
from dataclasses import replace
from dataclasses import dataclass, replace
from .target import Target
from .config import (
CreateKernelConfig,
OpenMpConfig,
VectorizationConfig,
AUTO
)
from .backend import KernelFunction
from .types import create_numeric_type, PsIntegerType, PsScalarType
from .backend.ast import PsAstNode
from .backend.ast.structural import PsBlock, PsLoop
from .backend.kernelcreation import (
KernelCreationContext,
......@@ -72,8 +76,12 @@ def create_kernel(
return driver(assignments)
def get_driver(cfg: CreateKernelConfig, *, retain_intermediates: bool = False):
return DefaultKernelCreationDriver(cfg, retain_intermediates)
class DefaultKernelCreationDriver:
def __init__(self, cfg: CreateKernelConfig):
def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False):
self._cfg = cfg
idx_dtype = create_numeric_type(self._cfg.index_dtype)
......@@ -87,59 +95,43 @@ class DefaultKernelCreationDriver:
self._target = self._cfg.get_target()
self._platform = self._get_platform()
if retain_intermediates:
self._intermediates = CodegenIntermediates()
else:
self._intermediates = None
@property
def intermediates(self) -> CodegenIntermediates | None:
return self._intermediates
def __call__(
self,
assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
):
if isinstance(assignments, AssignmentBase):
assignments = [assignments]
if not isinstance(assignments, AssignmentCollection):
assignments = AssignmentCollection(assignments) # type: ignore
_ = _parse_simplification_hints(assignments)
analysis = KernelAnalysis(
self._ctx,
not self._cfg.skip_independence_check,
not self._cfg.allow_double_writes,
kernel_body = self.parse_kernel_body(
assignments
)
analysis(assignments)
if len(self._ctx.fields.index_fields) > 0 or self._cfg.index_field is not None:
ispace = create_sparse_iteration_space(
self._ctx, assignments, index_field=self._cfg.index_field
)
else:
ispace = create_full_iteration_space(
self._ctx,
assignments,
ghost_layers=self._cfg.ghost_layers,
iteration_slice=self._cfg.iteration_slice,
)
self._ctx.set_iteration_space(ispace)
freeze = FreezeExpressions(self._ctx)
kernel_body = freeze(assignments)
typify = Typifier(self._ctx)
kernel_body = typify(kernel_body)
match self._platform:
case GenericCpu():
kernel_ast = self._platform.materialize_iteration_space(
kernel_body, ispace
kernel_body, self._ctx.get_iteration_space()
)
case GenericGpu():
kernel_ast, gpu_threads = self._platform.materialize_iteration_space(
kernel_body, ispace
kernel_body, self._ctx.get_iteration_space()
)
if self._intermediates is not None:
self._intermediates.materialized_ispace = kernel_ast.clone()
# Fold and extract constants
elim_constants = EliminateConstants(self._ctx, extract_constant_exprs=True)
kernel_ast = cast(PsBlock, elim_constants(kernel_ast))
if self._intermediates is not None:
self._intermediates.constants_eliminated = kernel_ast.clone()
# Target-Specific optimizations
if self._cfg.target.is_cpu():
kernel_ast = self._transform_for_cpu(kernel_ast)
......@@ -154,6 +146,9 @@ class DefaultKernelCreationDriver:
select_functions = SelectFunctions(self._platform)
kernel_ast = cast(PsBlock, select_functions(kernel_ast))
if self._intermediates is not None:
self._intermediates.lowered = kernel_ast.clone()
# Late canonicalization pass: Canonicalize new symbols introduced by LowerToC
canonicalize = CanonicalizeSymbols(self._ctx, True)
......@@ -179,13 +174,69 @@ class DefaultKernelCreationDriver:
self._cfg.get_jit(),
)
def parse_kernel_body(
self,
assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
) -> PsBlock:
if isinstance(assignments, AssignmentBase):
assignments = [assignments]
if not isinstance(assignments, AssignmentCollection):
assignments = AssignmentCollection(assignments) # type: ignore
_ = _parse_simplification_hints(assignments)
analysis = KernelAnalysis(
self._ctx,
not self._cfg.skip_independence_check,
not self._cfg.allow_double_writes,
)
analysis(assignments)
if self._cfg.index_field is not None:
ispace = create_sparse_iteration_space(
self._ctx, assignments, index_field=self._cfg.index_field
)
else:
gls = self._cfg.ghost_layers
islice = self._cfg.iteration_slice
if gls is None and islice is None:
gls = AUTO
ispace = create_full_iteration_space(
self._ctx,
assignments,
ghost_layers=gls,
iteration_slice=islice,
)
self._ctx.set_iteration_space(ispace)
freeze = FreezeExpressions(self._ctx)
kernel_body = freeze(assignments)
typify = Typifier(self._ctx)
kernel_body = typify(kernel_body)
if self._intermediates is not None:
self._intermediates.parsed_body = kernel_body.clone()
return kernel_body
def _transform_for_cpu(self, kernel_ast: PsBlock):
canonicalize = CanonicalizeSymbols(self._ctx, True)
kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
if self._intermediates is not None:
self._intermediates.cpu_canonicalize = kernel_ast.clone()
hoist_invariants = HoistLoopInvariantDeclarations(self._ctx)
kernel_ast = cast(PsBlock, hoist_invariants(kernel_ast))
if self._intermediates is not None:
self._intermediates.cpu_hoist_invariants = kernel_ast.clone()
cpu_cfg = self._cfg.cpu_optim
if cpu_cfg is None:
......@@ -207,6 +258,9 @@ class DefaultKernelCreationDriver:
add_omp = AddOpenMP(self._ctx, params)
kernel_ast = cast(PsBlock, add_omp(kernel_ast))
if self._intermediates is not None:
self._intermediates.cpu_openmp = kernel_ast.clone()
if cpu_cfg.use_cacheline_zeroing:
raise NotImplementedError("CL-zeroing not implemented yet")
......@@ -262,9 +316,15 @@ class DefaultKernelCreationDriver:
kernel_ast = vectorizer.vectorize_select_loops(kernel_ast, loop_predicate)
if self._intermediates is not None:
self._intermediates.cpu_vectorize = kernel_ast.clone()
select_intrin = SelectIntrinsics(self._ctx, self._platform)
kernel_ast = cast(PsBlock, select_intrin(kernel_ast))
if self._intermediates is not None:
self._intermediates.cpu_select_intrins = kernel_ast.clone()
return kernel_ast
def _get_platform(self) -> Platform:
......@@ -310,6 +370,60 @@ class DefaultKernelCreationDriver:
)
@dataclass
class StageResult:
ast: PsAstNode
label: str
class StageResultSlot:
def __init__(self, description: str | None = None):
self._description = description
self._name: str
self._lookup: str
def __set_name__(self, owner, name: str):
self._name = name
self._lookup = f"_{name}"
def __get__(self, obj, objtype=None) -> StageResult | None:
if obj is None:
return None
ast = getattr(obj, self._lookup, None)
if ast is not None:
descr = self._name if self._description is None else self._description
return StageResult(ast, descr)
else:
return None
def __set__(self, obj, val: PsAstNode | None):
setattr(obj, self._lookup, val)
class CodegenIntermediates:
"""Intermediate results produced by the code generator."""
parsed_body = StageResultSlot("Freeze & Type Deduction")
materialized_ispace = StageResultSlot("Iteration Space Materialization")
constants_eliminated = StageResultSlot("Constant Elimination")
cpu_canonicalize = StageResultSlot("CPU: Symbol Canonicalization")
cpu_hoist_invariants = StageResultSlot("CPU: Hoisting of Loop Invariants")
cpu_vectorize = StageResultSlot("CPU: Vectorization")
cpu_select_intrins = StageResultSlot("CPU: Intrinsics Selection")
cpu_openmp = StageResultSlot("CPU: OpenMP Instrumentation")
lowered = StageResultSlot("C Language Lowering")
@property
def available_stages(self) -> Sequence[StageResult]:
all_results: list[StageResult | None] = [
getattr(self, name)
for name, slot in CodegenIntermediates.__dict__.items()
if isinstance(slot, StageResultSlot)
]
return tuple(filter(lambda r: r is not None, all_results)) # type: ignore
def create_staggered_kernel(
assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs
):
......
......@@ -57,7 +57,7 @@ class SimplificationStrategy:
def __str__(self):
try:
import tabulate
from tabulate import tabulate
return tabulate(self.elements, headers=['Name', 'Runtime', 'Adds', 'Muls', 'Divs', 'Total'])
except ImportError:
result = "Name, Adds, Muls, Divs, Runtime\n"
......
......@@ -87,7 +87,7 @@ class Target(Flag):
"""
GPU = CUDA
"""Alias for backward compatibility."""
"""Alias for `Target.CUDA`, for backward compatibility."""
SYCL = _GPU | _SYCL
"""SYCL kernel target.
......
......@@ -683,7 +683,7 @@ class PsIeeeFloatType(PsScalarType):
def create_constant(self, value: Any) -> Any:
np_type = self.NUMPY_TYPES[self._width]
if isinstance(value, (int, float, np.floating)):
if isinstance(value, (int, float, np.integer, np.floating)):
finfo = np.finfo(np_type) # type: ignore
if value < finfo.min or value > finfo.max:
raise PsTypeError(
......
"""Fixtures for the pystencils test suite
This module provides a number of fixtures used by the pystencils test suite.
Use these fixtures wherever applicable to extend the code surface area covered
by your tests:
- All tests that should work for every target should use the `target` fixture
- All tests that should work with the highest optimization level for every target
should use the `gen_config` fixture
- Use the `xp` fixture to access the correct array module (numpy or cupy) depending
on the target
"""
import pytest
from types import ModuleType
from dataclasses import replace
import pystencils as ps
AVAILABLE_TARGETS = [ps.Target.GenericCPU]
try:
import cupy
AVAILABLE_TARGETS += [ps.Target.CUDA]
except ImportError:
pass
AVAILABLE_TARGETS += ps.Target.available_vector_cpu_targets()
TARGET_IDS = [t.name for t in AVAILABLE_TARGETS]
@pytest.fixture(params=AVAILABLE_TARGETS, ids=TARGET_IDS)
def target(request) -> ps.Target:
"""Provides all code generation targets available on the current hardware"""
return request.param
@pytest.fixture
def gen_config(target: ps.Target):
"""Default codegen configuration for the current target.
For GPU targets, set default indexing options.
For vector-CPU targets, set default vectorization config.
"""
gen_config = ps.CreateKernelConfig(target=target)
if target.is_vector_cpu():
gen_config = replace(
gen_config,
cpu_optim=ps.CpuOptimConfig(
vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
),
)
return gen_config
@pytest.fixture()
def xp(target: ps.Target) -> ModuleType:
"""Primary array module for the current target.
Returns:
`cupy` if `target == Target.CUDA`, and `numpy` otherwise
"""
if target == ps.Target.CUDA:
import cupy as xp
return xp
else:
import numpy as np
return np
......@@ -3,23 +3,39 @@ import pytest
import sympy as sp
import pystencils as ps
from pystencils import TypedSymbol, DEFAULTS
from pystencils.types import create_type
from pystencils.field import Field, FieldType, layout_string_to_tuple
from pystencils import DEFAULTS
from pystencils.field import (
Field,
FieldType,
layout_string_to_tuple,
spatial_layout_string_to_tuple,
)
def test_field_basic():
f = Field.create_generic('f', spatial_dimensions=2)
f = Field.create_generic("f", spatial_dimensions=2)
assert FieldType.is_generic(f)
assert f['E'] == f[1, 0]
assert f['N'] == f[0, 1]
assert '_' in f.center._latex('dummy')
assert f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[0] == 0
assert f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[1] == 0
assert f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[0] == 0
assert f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[1] == 0
assert f["E"] == f[1, 0]
assert f["N"] == f[0, 1]
assert "_" in f.center._latex("dummy")
assert (
f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[0]
== 0
)
assert (
f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[1]
== 0
)
assert (
f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[0]
== 0
)
assert (
f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[1]
== 0
)
f1 = f.new_field_with_different_name("f1")
assert f1.ndim == f.ndim
......@@ -28,7 +44,7 @@ def test_field_basic():
fixed = ps.fields("f(5, 5) : double[20, 20]")
assert fixed.neighbor_vector((1, 1)).shape == (5, 5)
f = Field.create_fixed_size('f', (10, 10), strides=(80, 8), dtype=np.float64)
f = Field.create_fixed_size("f", (10, 10), strides=(80, 8), dtype=np.float64)
assert f.spatial_strides == (10, 1)
assert f.index_strides == ()
assert f.center_vector == sp.Matrix([f.center])
......@@ -37,84 +53,99 @@ def test_field_basic():
assert f1.ndim == f.ndim
assert f1.values_per_cell() == f.values_per_cell()
f = Field.create_fixed_size('f', (8, 8, 2, 2), index_dimensions=2)
assert f.center_vector == sp.Matrix([[f(0, 0), f(0, 1)],
[f(1, 0), f(1, 1)]])
f = Field.create_fixed_size("f", (8, 8, 2, 2), index_dimensions=2)
assert f.center_vector == sp.Matrix([[f(0, 0), f(0, 1)], [f(1, 0), f(1, 1)]])
field_access = f[1, 1]
assert field_access.nr_of_coordinates == 2
assert field_access.offset_name == 'NE'
assert field_access.offset_name == "NE"
neighbor = field_access.neighbor(coord_id=0, offset=-2)
assert neighbor.offsets == (-1, 1)
assert '_' in neighbor._latex('dummy')
assert "_" in neighbor._latex("dummy")
f = Field.create_fixed_size('f', (8, 8, 2, 2, 2), index_dimensions=3)
assert f.center_vector == sp.Array([[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)])
f = Field.create_fixed_size("f", (8, 8, 2, 2, 2), index_dimensions=3)
assert f.center_vector == sp.Array(
[[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)]
)
f = Field.create_generic('f', spatial_dimensions=5, index_dimensions=2)
f = Field.create_generic("f", spatial_dimensions=5, index_dimensions=2)
field_access = f[1, -1, 2, -3, 0](1, 0)
assert field_access.offsets == (1, -1, 2, -3, 0)
assert field_access.index == (1, 0)
def test_error_handling():
struct_dtype = np.dtype([('a', np.int32), ('b', np.float64), ('c', np.uint32)], align=True)
Field.create_generic('f', spatial_dimensions=2, index_dimensions=0, dtype=struct_dtype)
struct_dtype = np.dtype(
[("a", np.int32), ("b", np.float64), ("c", np.uint32)], align=True
)
Field.create_generic(
"f", spatial_dimensions=2, index_dimensions=0, dtype=struct_dtype
)
with pytest.raises(ValueError) as e:
Field.create_generic('f', spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype)
assert 'index dimension' in str(e.value)
Field.create_generic(
"f", spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype
)
assert "index dimension" in str(e.value)
arr = np.array([[[(1,)*3, (2,)*3, (3,)*3]]*2], dtype=struct_dtype)
Field.create_from_numpy_array('f', arr, index_dimensions=0)
arr = np.array([[[(1,) * 3, (2,) * 3, (3,) * 3]] * 2], dtype=struct_dtype)
Field.create_from_numpy_array("f", arr, index_dimensions=0)
with pytest.raises(ValueError) as e:
Field.create_from_numpy_array('f', arr, index_dimensions=1)
assert 'Structured arrays' in str(e.value)
Field.create_from_numpy_array("f", arr, index_dimensions=1)
assert "Structured arrays" in str(e.value)
arr = np.zeros([3, 3, 3])
Field.create_from_numpy_array('f', arr, index_dimensions=2)
Field.create_from_numpy_array("f", arr, index_dimensions=2)
with pytest.raises(ValueError) as e:
Field.create_from_numpy_array('f', arr, index_dimensions=3)
assert 'Too many' in str(e.value)
Field.create_from_numpy_array("f", arr, index_dimensions=3)
assert "Too many" in str(e.value)
Field.create_fixed_size('f', (3, 2, 4), index_dimensions=0, dtype=struct_dtype, layout='reverse_numpy')
Field.create_fixed_size(
"f", (3, 2, 4), index_dimensions=0, dtype=struct_dtype, layout="reverse_numpy"
)
with pytest.raises(ValueError) as e:
Field.create_fixed_size('f', (3, 2, 4), index_dimensions=1, dtype=struct_dtype, layout='reverse_numpy')
assert 'Structured arrays' in str(e.value)
f = Field.create_fixed_size('f', (10, 10))
Field.create_fixed_size(
"f",
(3, 2, 4),
index_dimensions=1,
dtype=struct_dtype,
layout="reverse_numpy",
)
assert "Structured arrays" in str(e.value)
f = Field.create_fixed_size("f", (10, 10))
with pytest.raises(ValueError) as e:
f[1]
assert 'Wrong number of spatial indices' in str(e.value)
assert "Wrong number of spatial indices" in str(e.value)
f = Field.create_generic('f', spatial_dimensions=2, index_shape=(3,))
f = Field.create_generic("f", spatial_dimensions=2, index_shape=(3,))
with pytest.raises(ValueError) as e:
f(3)
assert 'out of bounds' in str(e.value)
assert "out of bounds" in str(e.value)
f = Field.create_fixed_size('f', (10, 10, 3, 4), index_dimensions=2)
f = Field.create_fixed_size("f", (10, 10, 3, 4), index_dimensions=2)
with pytest.raises(ValueError) as e:
f(3, 0)
assert 'out of bounds' in str(e.value)
assert "out of bounds" in str(e.value)
with pytest.raises(ValueError) as e:
f(1, 0)(1, 0)
assert 'Indexing an already indexed' in str(e.value)
assert "Indexing an already indexed" in str(e.value)
with pytest.raises(ValueError) as e:
f(1)
assert 'Wrong number of indices' in str(e.value)
assert "Wrong number of indices" in str(e.value)
with pytest.raises(ValueError) as e:
Field.create_generic('f', spatial_dimensions=2, layout='wrong')
assert 'Unknown layout descriptor' in str(e.value)
Field.create_generic("f", spatial_dimensions=2, layout="wrong")
assert "Unknown layout descriptor" in str(e.value)
assert layout_string_to_tuple('fzyx', dim=4) == (3, 2, 1, 0)
assert layout_string_to_tuple("fzyx", dim=4) == (3, 2, 1, 0)
with pytest.raises(ValueError) as e:
layout_string_to_tuple('wrong', dim=4)
assert 'Unknown layout descriptor' in str(e.value)
layout_string_to_tuple("wrong", dim=4)
assert "Unknown layout descriptor" in str(e.value)
def test_decorator_scoping():
dst = ps.fields('dst : double[2D]')
dst = ps.fields("dst : double[2D]")
def f1():
a = sp.Symbol("a")
......@@ -134,7 +165,7 @@ def test_decorator_scoping():
def test_string_creation():
x, y, z = ps.fields(' x(4), y(3,5) z : double[ 3, 47]')
x, y, z = ps.fields(" x(4), y(3,5) z : double[ 3, 47]")
assert x.index_shape == (4,)
assert y.index_shape == (3, 5)
assert z.spatial_shape == (3, 47)
......@@ -142,19 +173,85 @@ def test_string_creation():
def test_itemsize():
x = ps.fields('x: float32[1d]')
y = ps.fields('y: float64[2d]')
i = ps.fields('i: int16[1d]')
x = ps.fields("x: float32[1d]")
y = ps.fields("y: float64[2d]")
i = ps.fields("i: int16[1d]")
assert x.itemsize == 4
assert y.itemsize == 8
assert i.itemsize == 2
def test_spatial_memory_layout_descriptors():
assert (
spatial_layout_string_to_tuple("AoS", 3)
== spatial_layout_string_to_tuple("aos", 3)
== spatial_layout_string_to_tuple("ZYXF", 3)
== spatial_layout_string_to_tuple("zyxf", 3)
== (2, 1, 0)
)
assert (
spatial_layout_string_to_tuple("SoA", 3)
== spatial_layout_string_to_tuple("soa", 3)
== spatial_layout_string_to_tuple("FZYX", 3)
== spatial_layout_string_to_tuple("fzyx", 3)
== spatial_layout_string_to_tuple("f", 3)
== spatial_layout_string_to_tuple("F", 3)
== (2, 1, 0)
)
assert (
spatial_layout_string_to_tuple("c", 3)
== spatial_layout_string_to_tuple("C", 3)
== (0, 1, 2)
)
assert spatial_layout_string_to_tuple("C", 5) == (0, 1, 2, 3, 4)
with pytest.raises(ValueError):
spatial_layout_string_to_tuple("aos", -1)
with pytest.raises(ValueError):
spatial_layout_string_to_tuple("aos", 4)
def test_memory_layout_descriptors():
assert (
layout_string_to_tuple("AoS", 4)
== layout_string_to_tuple("aos", 4)
== layout_string_to_tuple("ZYXF", 4)
== layout_string_to_tuple("zyxf", 4)
== (2, 1, 0, 3)
)
assert (
layout_string_to_tuple("SoA", 4)
== layout_string_to_tuple("soa", 4)
== layout_string_to_tuple("FZYX", 4)
== layout_string_to_tuple("fzyx", 4)
== layout_string_to_tuple("f", 4)
== layout_string_to_tuple("F", 4)
== (3, 2, 1, 0)
)
assert (
layout_string_to_tuple("c", 4)
== layout_string_to_tuple("C", 4)
== (0, 1, 2, 3)
)
assert layout_string_to_tuple("C", 5) == (0, 1, 2, 3, 4)
with pytest.raises(ValueError):
layout_string_to_tuple("aos", -1)
with pytest.raises(ValueError):
layout_string_to_tuple("aos", 5)
def test_staggered():
# D2Q5
j1, j2, j3 = ps.fields('j1(2), j2(2,2), j3(2,2,2) : double[2D]', field_type=FieldType.STAGGERED)
j1, j2, j3 = ps.fields(
"j1(2), j2(2,2), j3(2,2,2) : double[2D]", field_type=FieldType.STAGGERED
)
assert j1[0, 1](1) == j1.staggered_access((0, sp.Rational(1, 2)))
assert j1[0, 1](1) == j1.staggered_access(np.array((0, sp.Rational(1, 2))))
......@@ -163,44 +260,68 @@ def test_staggered():
assert j1[0, 1](1) == j1.staggered_access("N")
assert j1[0, 0](1) == j1.staggered_access("S")
assert j1.staggered_vector_access("N") == sp.Matrix([j1.staggered_access("N")])
assert j1.staggered_stencil_name == 'D2Q5'
assert j1.staggered_stencil_name == "D2Q5"
assert j1.physical_coordinates[0] == DEFAULTS.spatial_counters[0]
assert j1.physical_coordinates[1] == DEFAULTS.spatial_counters[1]
assert j1.physical_coordinates_staggered[0] == DEFAULTS.spatial_counters[0] + 0.5
assert j1.physical_coordinates_staggered[1] == DEFAULTS.spatial_counters[1] + 0.5
assert j1.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=True)[0] == 0.5
assert j1.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=True)[1] == 0.5
assert j1.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=True)[0] == -0.5
assert j1.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=True)[1] == -0.5
assert (
j1.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=True)[0]
== 0.5
)
assert (
j1.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=True)[1]
== 0.5
)
assert (
j1.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=True)[0]
== -0.5
)
assert (
j1.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=True)[1]
== -0.5
)
assert j2[0, 1](1, 1) == j2.staggered_access((0, sp.Rational(1, 2)), 1)
assert j2[0, 1](1, 1) == j2.staggered_access("N", 1)
assert j2.staggered_vector_access("N") == sp.Matrix([j2.staggered_access("N", 0), j2.staggered_access("N", 1)])
assert j2.staggered_vector_access("N") == sp.Matrix(
[j2.staggered_access("N", 0), j2.staggered_access("N", 1)]
)
assert j3[0, 1](1, 1, 1) == j3.staggered_access((0, sp.Rational(1, 2)), (1, 1))
assert j3[0, 1](1, 1, 1) == j3.staggered_access("N", (1, 1))
assert j3.staggered_vector_access("N") == sp.Matrix([[j3.staggered_access("N", (i, j))
for j in range(2)] for i in range(2)])
assert j3.staggered_vector_access("N") == sp.Matrix(
[[j3.staggered_access("N", (i, j)) for j in range(2)] for i in range(2)]
)
# D2Q9
k1, k2 = ps.fields('k1(4), k2(2) : double[2D]', field_type=FieldType.STAGGERED)
k1, k2 = ps.fields("k1(4), k2(2) : double[2D]", field_type=FieldType.STAGGERED)
assert k1[1, 1](2) == k1.staggered_access("NE")
assert k1[0, 0](2) == k1.staggered_access("SW")
assert k1[0, 0](3) == k1.staggered_access("NW")
a = k1.staggered_access("NE")
assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(1, 2), sp.Rational(1, 2)]
assert a._staggered_offset(a.offsets, a.index[0]) == [
sp.Rational(1, 2),
sp.Rational(1, 2),
]
a = k1.staggered_access("SW")
assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(-1, 2), sp.Rational(-1, 2)]
assert a._staggered_offset(a.offsets, a.index[0]) == [
sp.Rational(-1, 2),
sp.Rational(-1, 2),
]
a = k1.staggered_access("NW")
assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(-1, 2), sp.Rational(1, 2)]
assert a._staggered_offset(a.offsets, a.index[0]) == [
sp.Rational(-1, 2),
sp.Rational(1, 2),
]
# sign reversed when using as flux field
r = ps.fields('r(2) : double[2D]', field_type=FieldType.STAGGERED_FLUX)
r = ps.fields("r(2) : double[2D]", field_type=FieldType.STAGGERED_FLUX)
assert r[0, 0](0) == r.staggered_access("W")
assert -r[1, 0](0) == r.staggered_access("E")
# test_staggered()
\ No newline at end of file
# test_staggered()
......@@ -18,35 +18,6 @@ from pystencils.assignment import assignment_from_stencil
from pystencils.kernelcreation import create_kernel, KernelFunction
from pystencils.backend.emission import emit_code
AVAILABLE_TARGETS = [Target.GenericCPU]
try:
import cupy
AVAILABLE_TARGETS += [Target.CUDA]
except ImportError:
pass
AVAILABLE_TARGETS += Target.available_vector_cpu_targets()
TEST_IDS = [t.name for t in AVAILABLE_TARGETS]
@pytest.fixture(params=AVAILABLE_TARGETS, ids=TEST_IDS)
def gen_config(request):
target: Target = request.param
gen_config = CreateKernelConfig(target=target)
if Target._VECTOR in target:
gen_config = replace(
gen_config,
cpu_optim=CpuOptimConfig(
vectorize=VectorizationConfig(assume_inner_stride_one=True)
),
)
return gen_config
def inspect_dp_kernel(kernel: KernelFunction, gen_config: CreateKernelConfig):
code = emit_code(kernel)
......
......@@ -7,7 +7,6 @@ from pystencils.backend.ast import dfs_preorder
from pystencils.backend.ast.expressions import PsCall
def unary_function(name, xp):
return {
"exp": (sp.exp, xp.exp),
......
import numpy as np
import sympy as sp
import pytest
from dataclasses import replace
from pystencils import (
DEFAULTS,
Assignment,
Field,
TypedSymbol,
create_kernel,
make_slice,
Target,
CreateKernelConfig,
GpuIndexingConfig,
DynamicType,
)
from pystencils.sympyextensions.integer_functions import int_rem
from pystencils.simp import sympy_cse_on_assignment_list
from pystencils.slicing import normalize_slice
from pystencils.backend.jit.gpu_cupy import CupyKernelWrapper
def test_sliced_iteration():
size = (4, 4)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array("src", src_arr)
dst_field = Field.create_from_numpy_array("dst", dst_arr)
a, b = sp.symbols("a b")
update_rule = Assignment(
dst_field[0, 0],
(
a * src_field[0, 1]
+ a * src_field[0, -1]
+ b * src_field[1, 0]
+ b * src_field[-1, 0]
)
/ 4,
)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
kernel = create_kernel(
sympy_cse_on_assignment_list([update_rule]), iteration_slice=s
).compile()
kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(expected_result, dst_arr)
@pytest.mark.parametrize(
"islice",
[
make_slice[1:-1, 1:-1],
make_slice[3, 2:-2],
make_slice[2:-2:2, ::3],
make_slice[10:, :-5:2],
make_slice[-5:-1, -1],
make_slice[-3, -1]
],
)
def test_numerical_slices(gen_config: CreateKernelConfig, xp, islice):
shape = (16, 16)
f_arr = xp.zeros(shape)
expected = xp.zeros_like(f_arr)
expected[islice] = 1.0
f = Field.create_from_numpy_array("f", f_arr)
update = Assignment(f.center(), 1)
gen_config = replace(gen_config, iteration_slice=islice)
try:
kernel = create_kernel(update, gen_config).compile()
except NotImplementedError:
if gen_config.target.is_vector_cpu():
# TODO Gather/Scatter not implemented yet
pytest.xfail("Gather/Scatter not available yet")
kernel(f=f_arr)
xp.testing.assert_array_equal(f_arr, expected)
def test_symbolic_slice(gen_config: CreateKernelConfig, xp):
shape = (16, 16)
sx, sy, ex, ey = [
TypedSymbol(n, DynamicType.INDEX_TYPE) for n in ("sx", "sy", "ex", "ey")
]
f_arr = xp.zeros(shape)
f = Field.create_from_numpy_array("f", f_arr)
update = Assignment(f.center(), 1)
islice = make_slice[sy:ey, sx:ex]
gen_config = replace(gen_config, iteration_slice=islice)
kernel = create_kernel(update, gen_config).compile()
for slic in [make_slice[:, :], make_slice[1:-1, 2:-2], make_slice[8:14, 7:11]]:
slic = normalize_slice(slic, shape)
expected = xp.zeros_like(f_arr)
expected[slic] = 1.0
f_arr[:] = 0.0
kernel(
f=f_arr,
sy=slic[0].start,
ey=slic[0].stop,
sx=slic[1].start,
ex=slic[1].stop,
)
xp.testing.assert_array_equal(f_arr, expected)
def test_triangle_pattern(gen_config: CreateKernelConfig, xp):
shape = (16, 16)
f_arr = xp.zeros(shape)
f = Field.create_from_numpy_array("f", f_arr)
expected = xp.zeros_like(f_arr)
for r in range(shape[0]):
expected[r, r:] = 1.0
update = Assignment(f.center(), 1)
outer_counter = DEFAULTS.spatial_counters[0]
islice = make_slice[:, outer_counter:]
gen_config = replace(gen_config, iteration_slice=islice)
if gen_config.target == Target.CUDA:
gen_config = replace(
gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
)
kernel = create_kernel(update, gen_config).compile()
if isinstance(kernel, CupyKernelWrapper):
kernel.block_size = shape + (1,)
kernel.num_blocks = (1, 1, 1)
kernel(f=f_arr)
xp.testing.assert_array_equal(f_arr, expected)
def test_red_black_pattern(gen_config: CreateKernelConfig, xp):
shape = (16, 16)
f_arr = xp.zeros(shape)
f = Field.create_from_numpy_array("f", f_arr)
expected = xp.zeros_like(f_arr)
for r in range(shape[0]):
start = 0 if r % 2 == 0 else 1
expected[r, start::2] = 1.0
update = Assignment(f.center(), 1)
outer_counter = DEFAULTS.spatial_counters[0]
start = sp.Piecewise((0, sp.Eq(int_rem(outer_counter, 2), 0)), (1, True))
islice = make_slice[:, start::2]
gen_config = replace(gen_config, iteration_slice=islice)
if gen_config.target == Target.CUDA:
gen_config = replace(
gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
)
try:
kernel = create_kernel(update, gen_config).compile()
except NotImplementedError:
if gen_config.target.is_vector_cpu():
pytest.xfail("Gather/Scatter not implemented yet")
if isinstance(kernel, CupyKernelWrapper):
kernel.block_size = (8, 16, 1)
kernel.num_blocks = (1, 1, 1)
kernel(f=f_arr)
xp.testing.assert_array_equal(f_arr, expected)
import numpy as np
import sympy as sp
from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
from pystencils.simp import sympy_cse_on_assignment_list
def test_sliced_iteration():
size = (4, 4)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr)
dst_field = Field.create_from_numpy_array('dst', dst_arr)
a, b = sp.symbols("a b")
update_rule = Assignment(dst_field[0, 0],
(a * src_field[0, 1] + a * src_field[0, -1] +
b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(expected_result, dst_arr)
......@@ -129,6 +129,36 @@ def test_slices_with_negative_start():
)
def test_negative_singular_slices():
ctx = KernelCreationContext()
factory = AstFactory(ctx)
archetype_field = Field.create_generic("f", spatial_dimensions=2, layout="fzyx")
ctx.add_field(archetype_field)
archetype_arr = ctx.get_buffer(archetype_field)
islice = (-2, -1)
ispace = FullIterationSpace.create_from_slice(ctx, islice, archetype_field)
dims = ispace.dimensions
assert dims[0].start.structurally_equal(
PsExpression.make(archetype_arr.shape[0]) + factory.parse_index(-2)
)
assert dims[0].stop.structurally_equal(
PsExpression.make(archetype_arr.shape[0]) + factory.parse_index(-1)
)
assert dims[1].start.structurally_equal(
PsExpression.make(archetype_arr.shape[1]) + factory.parse_index(-1)
)
assert dims[1].stop.structurally_equal(
PsExpression.make(archetype_arr.shape[1])
)
def test_field_independent_slices():
ctx = KernelCreationContext()
......
......@@ -89,6 +89,11 @@ TEST_SETUPS: list[VectorTestSetup] = list(
TEST_IDS = [t.name for t in TEST_SETUPS]
@pytest.fixture(params=TEST_SETUPS, ids=TEST_IDS)
def vectorization_setup(request) -> VectorTestSetup:
return request.param
def create_vector_kernel(
assignments: list[Assignment],
field: Field,
......@@ -139,9 +144,9 @@ def create_vector_kernel(
return kernel
@pytest.mark.parametrize("setup", TEST_SETUPS, ids=TEST_IDS)
@pytest.mark.parametrize("ghost_layers", [0, 2])
def test_update_kernel(setup: VectorTestSetup, ghost_layers: int):
def test_update_kernel(vectorization_setup: VectorTestSetup, ghost_layers: int):
setup = vectorization_setup
src, dst = fields(f"src(2), dst(4): {setup.numeric_dtype}[2D]", layout="fzyx")
x = sp.symbols("x_:4")
......@@ -197,8 +202,8 @@ def test_update_kernel(setup: VectorTestSetup, ghost_layers: int):
np.testing.assert_equal(dst_arr[:, -i, :], 0.0)
@pytest.mark.parametrize("setup", TEST_SETUPS, ids=TEST_IDS)
def test_trailing_iterations(setup: VectorTestSetup):
def test_trailing_iterations(vectorization_setup: VectorTestSetup):
setup = vectorization_setup
f = fields(f"f(1): {setup.numeric_dtype}[1D]", layout="fzyx")
update = [Assignment(f(0), 2 * f(0))]
......@@ -216,3 +221,24 @@ def test_trailing_iterations(setup: VectorTestSetup):
kernel(f=f_arr)
np.testing.assert_equal(f_arr, 2.0)
def test_only_trailing_iterations(vectorization_setup: VectorTestSetup):
setup = vectorization_setup
f = fields(f"f(1): {setup.numeric_dtype}[1D]", layout="fzyx")
update = [Assignment(f(0), 2 * f(0))]
kernel = create_vector_kernel(update, f, setup)
for trailing_iters in range(1, setup.lanes):
shape = (trailing_iters, 1)
f_arr = create_numpy_array_with_layout(
shape, layout=(1, 0), dtype=setup.numeric_dtype.numpy_dtype
)
f_arr[:] = 1.0
kernel(f=f_arr)
np.testing.assert_equal(f_arr, 2.0)