Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 652 additions and 337 deletions
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
from typing import Sequence
from collections import defaultdict
......@@ -10,8 +9,6 @@ from ..ast import PsAstNode
from ..ast.structural import PsBlock, PsLoop, PsPragma
from ..ast.expressions import PsExpression
if TYPE_CHECKING:
from ...codegen.config import OpenMpConfig
__all__ = ["InsertPragmasAtLoops", "LoopPragma", "AddOpenMP"]
......@@ -101,23 +98,40 @@ class InsertPragmasAtLoops:
class AddOpenMP:
"""Apply OpenMP directives to loop nests.
This transformation augments the AST with OpenMP pragmas according to the given
`OpenMpConfig` configuration.
This transformation augments the AST with OpenMP pragmas according to the given configuration.
"""
def __init__(self, ctx: KernelCreationContext, omp_params: OpenMpConfig) -> None:
def __init__(
self,
ctx: KernelCreationContext,
nesting_depth: int = 0,
num_threads: int | None = None,
schedule: str | None = None,
collapse: int | None = None,
omit_parallel: bool = False,
) -> None:
pragma_text = "omp"
pragma_text += " parallel" if not omp_params.omit_parallel_construct else ""
pragma_text += f" for schedule({omp_params.schedule})"
if omp_params.num_threads is not None:
pragma_text += f" num_threads({str(omp_params.num_threads)})"
if not omit_parallel:
pragma_text += " parallel"
pragma_text += " for"
if schedule is not None:
pragma_text += f" schedule({schedule})"
if num_threads is not None:
pragma_text += f" num_threads({str(num_threads)})"
if omp_params.collapse > 0:
pragma_text += f" collapse({str(omp_params.collapse)})"
if collapse is not None:
if collapse <= 0:
raise ValueError(
f"Invalid value for OpenMP `collapse` clause: {collapse}"
)
pragma_text += f" collapse({str(collapse)})"
self._insert_pragmas = InsertPragmasAtLoops(
ctx, [LoopPragma(pragma_text, omp_params.nesting_depth)]
ctx, [LoopPragma(pragma_text, nesting_depth)]
)
def __call__(self, node: PsAstNode) -> PsAstNode:
......
from .target import Target
from .config import (
CreateKernelConfig,
CpuOptimConfig,
VectorizationConfig,
OpenMpConfig,
GpuIndexingConfig,
AUTO,
)
from .parameters import Parameter
......@@ -14,10 +10,6 @@ from .driver import create_kernel, get_driver
__all__ = [
"Target",
"CreateKernelConfig",
"CpuOptimConfig",
"VectorizationConfig",
"OpenMpConfig",
"GpuIndexingConfig",
"AUTO",
"Parameter",
"Kernel",
......
This diff is collapsed.
......@@ -3,12 +3,20 @@ from typing import cast, Sequence, Iterable, TYPE_CHECKING
from dataclasses import dataclass, replace
from .target import Target
from .config import CreateKernelConfig, OpenMpConfig, VectorizationConfig, AUTO
from .config import (
CreateKernelConfig,
VectorizationOptions,
AUTO,
_AUTO_TYPE,
GhostLayerSpec,
IterationSliceSpec,
)
from .kernel import Kernel, GpuKernel, GpuThreadsRange
from .properties import PsSymbolProperty, FieldShape, FieldStride, FieldBasePtr
from .parameters import Parameter
from ..types import create_numeric_type, PsIntegerType, PsScalarType
from ..field import Field
from ..types import PsIntegerType, PsScalarType
from ..backend.memory import PsSymbol
from ..backend.ast import PsAstNode
......@@ -105,15 +113,38 @@ class DefaultKernelCreationDriver:
def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False):
self._cfg = cfg
idx_dtype = create_numeric_type(self._cfg.index_dtype)
assert isinstance(idx_dtype, PsIntegerType)
# Data Type Options
idx_dtype: PsIntegerType = cfg.get_option("index_dtype")
default_dtype: PsScalarType = cfg.get_option("default_dtype")
# Iteration Space Options
num_ispace_options_set = (
int(cfg.is_option_set("ghost_layers"))
+ int(cfg.is_option_set("iteration_slice"))
+ int(cfg.is_option_set("index_field"))
)
if num_ispace_options_set > 1:
raise ValueError(
"At most one of the options 'ghost_layers' 'iteration_slice' and 'index_field' may be set."
)
self._ghost_layers: GhostLayerSpec | None = cfg.get_option("ghost_layers")
self._iteration_slice: IterationSliceSpec | None = cfg.get_option(
"iteration_slice"
)
self._index_field: Field | None = cfg.get_option("index_field")
if num_ispace_options_set == 0:
self._ghost_layers = AUTO
# Create the context
self._ctx = KernelCreationContext(
default_dtype=create_numeric_type(self._cfg.default_dtype),
default_dtype=default_dtype,
index_dtype=idx_dtype,
)
self._target = self._cfg.get_target()
self._target = cfg.get_target()
self._platform = self._get_platform()
self._intermediates: CodegenIntermediates | None
......@@ -153,7 +184,7 @@ class DefaultKernelCreationDriver:
self._intermediates.constants_eliminated = kernel_ast.clone()
# Target-Specific optimizations
if self._cfg.target.is_cpu():
if self._target.is_cpu():
kernel_ast = self._transform_for_cpu(kernel_ast)
# Note: After this point, the AST may contain intrinsics, so type-dependent
......@@ -174,13 +205,13 @@ class DefaultKernelCreationDriver:
canonicalize = CanonicalizeSymbols(self._ctx, True)
kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
if self._cfg.target.is_cpu():
if self._target.is_cpu():
return create_cpu_kernel_function(
self._ctx,
self._platform,
kernel_ast,
self._cfg.function_name,
self._cfg.target,
self._cfg.get_option("function_name"),
self._target,
self._cfg.get_jit(),
)
else:
......@@ -189,8 +220,8 @@ class DefaultKernelCreationDriver:
self._platform,
kernel_ast,
gpu_threads,
self._cfg.function_name,
self._cfg.target,
self._cfg.get_option("function_name"),
self._target,
self._cfg.get_jit(),
)
......@@ -213,22 +244,26 @@ class DefaultKernelCreationDriver:
)
analysis(assignments)
if self._cfg.index_field is not None:
if self._index_field is not None:
ispace = create_sparse_iteration_space(
self._ctx, assignments, index_field=self._cfg.index_field
)
else:
gls = self._cfg.ghost_layers
islice = self._cfg.iteration_slice
if gls is None and islice is None:
gls = AUTO
gls: GhostLayerSpec | None
if self._ghost_layers == AUTO:
infer_gls = True
gls = None
else:
assert not isinstance(self._ghost_layers, _AUTO_TYPE)
infer_gls = False
gls = self._ghost_layers
ispace = create_full_iteration_space(
self._ctx,
assignments,
ghost_layers=gls,
iteration_slice=islice,
iteration_slice=self._iteration_slice,
infer_ghost_layers=infer_gls,
)
self._ctx.set_iteration_space(ispace)
......@@ -257,7 +292,7 @@ class DefaultKernelCreationDriver:
if self._intermediates is not None:
self._intermediates.cpu_hoist_invariants = kernel_ast.clone()
cpu_cfg = self._cfg.cpu_optim
cpu_cfg = self._cfg.cpu
if cpu_cfg is None:
return kernel_ast
......@@ -266,30 +301,41 @@ class DefaultKernelCreationDriver:
raise NotImplementedError("Loop blocking not implemented yet.")
kernel_ast = self._vectorize(kernel_ast)
kernel_ast = self._add_openmp(kernel_ast)
if cpu_cfg.use_cacheline_zeroing:
raise NotImplementedError("CL-zeroing not implemented yet")
return kernel_ast
def _add_openmp(self, kernel_ast: PsBlock) -> PsBlock:
omp_options = self._cfg.cpu.openmp
enable_omp: bool = omp_options.get_option("enable")
if cpu_cfg.openmp is not False:
if enable_omp:
from ..backend.transformations import AddOpenMP
params = (
cpu_cfg.openmp
if isinstance(cpu_cfg.openmp, OpenMpConfig)
else OpenMpConfig()
add_omp = AddOpenMP(
self._ctx,
nesting_depth=omp_options.get_option("nesting_depth"),
num_threads=omp_options.get_option("num_threads"),
schedule=omp_options.get_option("schedule"),
collapse=omp_options.get_option("collapse"),
omit_parallel=omp_options.get_option("omit_parallel_construct"),
)
add_omp = AddOpenMP(self._ctx, params)
kernel_ast = cast(PsBlock, add_omp(kernel_ast))
if self._intermediates is not None:
self._intermediates.cpu_openmp = kernel_ast.clone()
if cpu_cfg.use_cacheline_zeroing:
raise NotImplementedError("CL-zeroing not implemented yet")
return kernel_ast
def _vectorize(self, kernel_ast: PsBlock) -> PsBlock:
assert self._cfg.cpu_optim is not None
vec_config = self._cfg.cpu_optim.get_vectorization_config()
if vec_config is None:
vec_options = self._cfg.cpu.vectorize
enable_vec = vec_options.get_option("enable")
if not enable_vec:
return kernel_ast
from ..backend.transformations import LoopVectorizer, SelectIntrinsics
......@@ -306,7 +352,9 @@ class DefaultKernelCreationDriver:
inner_loop_dim = ispace.dimensions[inner_loop_coord]
# Apply stride (TODO: and alignment) assumptions
if vec_config.assume_inner_stride_one:
assume_unit_stride: bool = vec_options.get_option("assume_inner_stride_one")
if assume_unit_stride:
for field in self._ctx.fields:
buf = self._ctx.get_buffer(field)
inner_stride = buf.strides[inner_loop_coord]
......@@ -322,14 +370,14 @@ class DefaultKernelCreationDriver:
# TODO: Communicate assumption to runtime system via a precondition
# Call loop vectorizer
if vec_config.lanes is None:
lanes = VectorizationConfig.default_lanes(
num_lanes: int | None = vec_options.get_option("lanes")
if num_lanes is None:
num_lanes = VectorizationOptions.default_lanes(
self._target, cast(PsScalarType, self._ctx.default_dtype)
)
else:
lanes = vec_config.lanes
vectorizer = LoopVectorizer(self._ctx, lanes)
vectorizer = LoopVectorizer(self._ctx, num_lanes)
def loop_predicate(loop: PsLoop):
return loop.counter.symbol == inner_loop_dim.counter
......@@ -375,15 +423,30 @@ class DefaultKernelCreationDriver:
)
elif Target._GPU in self._target:
gpu_opts = self._cfg.gpu
omit_range_check: bool = gpu_opts.get_option("omit_range_check")
match self._target:
case Target.SYCL:
from ..backend.platforms import SyclPlatform
return SyclPlatform(self._ctx, self._cfg.gpu_indexing)
auto_block_size: bool = self._cfg.sycl.get_option("automatic_block_size")
return SyclPlatform(
self._ctx,
omit_range_check=omit_range_check,
automatic_block_size=auto_block_size,
)
case Target.CUDA:
from ..backend.platforms import CudaPlatform
return CudaPlatform(self._ctx, self._cfg.gpu_indexing)
manual_grid = gpu_opts.get_option("manual_launch_grid")
return CudaPlatform(
self._ctx,
omit_range_check=omit_range_check,
manual_launch_grid=manual_grid,
)
raise NotImplementedError(
f"Code generation for target {self._target} not implemented"
......
......@@ -35,7 +35,7 @@ class PsCustomType(PsType):
return self._name
def c_string(self) -> str:
return f"{self._const_string()} {self._name}"
return f"{self._const_string()}{self._name}"
def __repr__(self) -> str:
return f"CustomType( {self.name}, const={self.const} )"
......
import pytest
from dataclasses import dataclass
import numpy as np
from pystencils.codegen.config import (
BasicOption,
Option,
Category,
ConfigBase,
CreateKernelConfig,
CpuOptions
)
from pystencils.field import Field, FieldType
from pystencils.types.quick import Int, UInt, Fp, Ptr
from pystencils.types import PsVectorType
def test_descriptors():
@dataclass
class SampleCategory(ConfigBase):
val1: BasicOption[int] = BasicOption(2)
val2: Option[bool, str | bool] = Option(False)
@val2.validate
def validate_val2(self, v: str | bool):
if isinstance(v, str):
if v.lower() in ("off", "false", "no"):
return False
elif v.lower() in ("on", "true", "yes"):
return True
raise ValueError()
else:
return v
@dataclass
class SampleConfig(ConfigBase):
cat: Category[SampleCategory] = Category(SampleCategory())
val: BasicOption[str] = BasicOption("fallback")
cfg = SampleConfig()
# Check unset and default values
assert cfg.val is None
assert cfg.get_option("val") == "fallback"
# Check setting
cfg.val = "test"
assert cfg.val == "test"
assert cfg.get_option("val") == "test"
assert cfg.is_option_set("val")
# Check unsetting
cfg.val = None
assert not cfg.is_option_set("val")
assert cfg.val is None
# Check category
assert cfg.cat.val1 is None
assert cfg.cat.get_option("val1") == 2
assert cfg.cat.val2 is None
assert cfg.cat.get_option("val2") is False
# Check copy on category setting
c = SampleCategory(32, "on")
cfg.cat = c
assert cfg.cat.val1 == 32
assert cfg.cat.val2 is True
assert cfg.cat is not c
c.val1 = 13
assert cfg.cat.val1 == 32
# Check that category objects on two config objects are not the same
cfg1 = SampleConfig()
cfg2 = SampleConfig()
assert cfg1.cat is not cfg2.cat
def test_category_init():
cfg1 = CreateKernelConfig()
cfg2 = CreateKernelConfig()
assert cfg1.cpu is not cfg2.cpu
assert cfg1.cpu.openmp is not cfg2.cpu.openmp
assert cfg1.cpu.vectorize is not cfg2.cpu.vectorize
assert cfg1.gpu is not cfg2.gpu
def test_category_copy():
cfg = CreateKernelConfig()
cpu_repl = CpuOptions()
cpu_repl.openmp.num_threads = 42
cfg.cpu = cpu_repl
assert cfg.cpu.openmp.num_threads == 42
assert cfg.cpu is not cpu_repl
assert cfg.cpu.openmp is not cpu_repl.openmp
def test_config_validation():
# Check index dtype validation
cfg = CreateKernelConfig(index_dtype="int32")
assert cfg.index_dtype == Int(32)
cfg.index_dtype = np.uint64
assert cfg.index_dtype == UInt(64)
with pytest.raises(ValueError):
_ = CreateKernelConfig(index_dtype=np.float32)
with pytest.raises(ValueError):
cfg.index_dtype = "double"
# Check default dtype validation
cfg = CreateKernelConfig(default_dtype="float32")
assert cfg.default_dtype == Fp(32)
cfg.default_dtype = np.int64
assert cfg.default_dtype == Int(64)
with pytest.raises(ValueError):
cfg.default_dtype = PsVectorType(Fp(64), 4)
with pytest.raises(ValueError):
_ = CreateKernelConfig(default_dtype=Ptr(Fp(32)))
# Check index field validation
idx_field = Field.create_generic(
"idx", spatial_dimensions=1, field_type=FieldType.INDEXED
)
cfg.index_field = idx_field
assert cfg.index_field == idx_field
with pytest.raises(ValueError):
cfg.index_field = Field.create_generic(
"idx", spatial_dimensions=1, field_type=FieldType.GENERIC
)
def test_override():
cfg1 = CreateKernelConfig()
cfg1.function_name = "test"
cfg1.cpu.openmp.schedule = "dynamic"
cfg1.gpu.manual_launch_grid = False
cfg1.allow_double_writes = True
cfg2 = CreateKernelConfig()
cfg2.function_name = "func"
cfg2.cpu.openmp.schedule = "static(5)"
cfg2.cpu.vectorize.lanes = 12
cfg2.allow_double_writes = False
cfg1.override(cfg2)
assert cfg1.function_name == "func"
assert cfg1.cpu.openmp.schedule == "static(5)"
assert cfg1.cpu.openmp.enable is None
assert cfg1.cpu.vectorize.lanes == 12
assert cfg1.cpu.vectorize.assume_aligned is None
assert cfg1.allow_double_writes is False
......@@ -41,7 +41,7 @@ def target(request) -> ps.Target:
@pytest.fixture
def gen_config(target: ps.Target):
"""Default codegen configuration for the current target.
For GPU targets, set default indexing options.
For vector-CPU targets, set default vectorization config.
"""
......@@ -49,12 +49,8 @@ def gen_config(target: ps.Target):
gen_config = ps.CreateKernelConfig(target=target)
if target.is_vector_cpu():
gen_config = replace(
gen_config,
cpu_optim=ps.CpuOptimConfig(
vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
),
)
gen_config.cpu.vectorize.enable = True
gen_config.cpu.vectorize.assume_inner_stride_one = True
return gen_config
......@@ -62,13 +58,15 @@ def gen_config(target: ps.Target):
@pytest.fixture()
def xp(target: ps.Target) -> ModuleType:
"""Primary array module for the current target.
Returns:
`cupy` if `target == Target.CUDA`, and `numpy` otherwise
"""
if target == ps.Target.CUDA:
import cupy as xp
return xp
else:
import numpy as np
return np
......@@ -299,8 +299,7 @@ def test_iteration_slices(gpu_indexing):
gpu_src_arr.set(src_arr)
gpu_dst_arr.fill(0)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
gpu_indexing=gpu_indexing)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice)
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
......@@ -312,8 +311,7 @@ def test_iteration_slices(gpu_indexing):
eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
gpu_indexing=gpu_indexing)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice)
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
......
......@@ -112,7 +112,7 @@ def test_ghost_layer():
update_rule = Assignment(dst_field[0, 0], src_field[0, 0])
ghost_layers = [(1, 2), (2, 1)]
config = CreateKernelConfig(target=Target.GPU, ghost_layers=ghost_layers, gpu_indexing="line")
config = CreateKernelConfig(target=Target.GPU, ghost_layers=ghost_layers, gpu="line")
ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config)
kernel = ast.compile()
......@@ -135,7 +135,7 @@ def test_setting_value():
f = Field.create_generic("f", 2)
update_rule = [Assignment(f(0), sp.Symbol("value"))]
config = CreateKernelConfig(target=Target.GPU, gpu_indexing="line", iteration_slice=iteration_slice)
config = CreateKernelConfig(target=Target.GPU, gpu="line", iteration_slice=iteration_slice)
ast = create_kernel(sympy_cse_on_assignment_list(update_rule), config=config)
kernel = ast.compile()
......@@ -207,7 +207,7 @@ def test_four_dimensional_kernel(gpu_indexing, layout, shape):
f = Field.create_from_numpy_array("f", arr_cpu)
update_rule = [Assignment(f.center, sp.Symbol("value"))]
config = CreateKernelConfig(target=Target.GPU, gpu_indexing=gpu_indexing, iteration_slice=iteration_slice)
config = CreateKernelConfig(target=Target.GPU, gpu=gpu_indexing, iteration_slice=iteration_slice)
ast = create_kernel(update_rule, config=config)
kernel = ast.compile()
......
......@@ -13,7 +13,6 @@ from pystencils import (
make_slice,
Target,
CreateKernelConfig,
GpuIndexingConfig,
DynamicType,
)
from pystencils.sympyextensions.integer_functions import int_rem
......@@ -81,7 +80,7 @@ def test_numerical_slices(gen_config: CreateKernelConfig, xp, islice):
try:
kernel = create_kernel(update, gen_config).compile()
except NotImplementedError:
if gen_config.target.is_vector_cpu():
if gen_config.get_target().is_vector_cpu():
# TODO Gather/Scatter not implemented yet
pytest.xfail("Gather/Scatter not available yet")
......@@ -104,6 +103,9 @@ def test_symbolic_slice(gen_config: CreateKernelConfig, xp):
update = Assignment(f.center(), 1)
islice = make_slice[sy:ey, sx:ex]
gen_config = replace(gen_config, iteration_slice=islice)
print(repr(gen_config))
kernel = create_kernel(update, gen_config).compile()
for slic in [make_slice[:, :], make_slice[1:-1, 2:-2], make_slice[8:14, 7:11]]:
......@@ -140,9 +142,7 @@ def test_triangle_pattern(gen_config: CreateKernelConfig, xp):
gen_config = replace(gen_config, iteration_slice=islice)
if gen_config.target == Target.CUDA:
gen_config = replace(
gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
)
gen_config.gpu.manual_launch_grid = True
kernel = create_kernel(update, gen_config).compile()
......@@ -170,17 +170,15 @@ def test_red_black_pattern(gen_config: CreateKernelConfig, xp):
outer_counter = DEFAULTS.spatial_counters[0]
start = sp.Piecewise((0, sp.Eq(int_rem(outer_counter, 2), 0)), (1, True))
islice = make_slice[:, start::2]
gen_config = replace(gen_config, iteration_slice=islice)
gen_config.iteration_slice = islice
if gen_config.target == Target.CUDA:
gen_config = replace(
gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
)
gen_config.gpu.manual_launch_grid = True
try:
kernel = create_kernel(update, gen_config).compile()
except NotImplementedError:
if gen_config.target.is_vector_cpu():
if gen_config.get_target().is_vector_cpu():
pytest.xfail("Gather/Scatter not implemented yet")
if isinstance(kernel, CupyKernelWrapper):
......
......@@ -4,8 +4,6 @@ from pystencils import (
Assignment,
create_kernel,
CreateKernelConfig,
CpuOptimConfig,
OpenMpConfig,
Target,
)
......@@ -15,21 +13,18 @@ from pystencils.backend.ast.structural import PsLoop, PsPragma
@pytest.mark.parametrize("nesting_depth", range(3))
@pytest.mark.parametrize("schedule", ["static", "static,16", "dynamic", "auto"])
@pytest.mark.parametrize("collapse", range(3))
@pytest.mark.parametrize("collapse", [None, 1, 2])
@pytest.mark.parametrize("omit_parallel_construct", range(3))
def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
f, g = fields("f, g: [3D]")
asm = Assignment(f.center(0), g.center(0))
omp = OpenMpConfig(
nesting_depth=nesting_depth,
schedule=schedule,
collapse=collapse,
omit_parallel_construct=omit_parallel_construct,
)
gen_config = CreateKernelConfig(
target=Target.CPU, cpu_optim=CpuOptimConfig(openmp=omp)
)
gen_config = CreateKernelConfig(target=Target.CPU)
gen_config.cpu.openmp.enable = True
gen_config.cpu.openmp.nesting_depth = nesting_depth
gen_config.cpu.openmp.schedule = schedule
gen_config.cpu.openmp.collapse = collapse
gen_config.cpu.openmp.omit_parallel_construct = omit_parallel_construct
kernel = create_kernel(asm, gen_config)
ast = kernel.body
......@@ -52,10 +47,10 @@ def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
pragma = find_omp_pragma(ast)
tokens = set(pragma.text.split())
expected_tokens = {"omp", "for", f"schedule({omp.schedule})"}
if not omp.omit_parallel_construct:
expected_tokens = {"omp", "for", f"schedule({schedule})"}
if not omit_parallel_construct:
expected_tokens.add("parallel")
if omp.collapse > 0:
expected_tokens.add(f"collapse({omp.collapse})")
if collapse is not None:
expected_tokens.add(f"collapse({collapse})")
assert tokens == expected_tokens
import pytest
from pystencils.field import Field, FieldType
from pystencils.types.quick import *
from pystencils.codegen.config import (
CreateKernelConfig,
PsOptionsError,
)
def test_invalid_iteration_region_options():
idx_field = Field.create_generic(
"idx", spatial_dimensions=1, field_type=FieldType.INDEXED
)
with pytest.raises(PsOptionsError):
CreateKernelConfig(
ghost_layers=2, iteration_slice=(slice(1, -1), slice(1, -1))
)
with pytest.raises(PsOptionsError):
CreateKernelConfig(ghost_layers=2, index_field=idx_field)
def test_index_field_options():
with pytest.raises(PsOptionsError):
idx_field = Field.create_generic(
"idx", spatial_dimensions=1, field_type=FieldType.GENERIC
)
CreateKernelConfig(index_field=idx_field)
......@@ -12,6 +12,7 @@ from pystencils.backend.ast import dfs_preorder
from pystencils.backend.ast.structural import PsBlock, PsPragma, PsLoop
from pystencils.backend.transformations import InsertPragmasAtLoops, LoopPragma
def test_insert_pragmas():
ctx = KernelCreationContext()
factory = AstFactory(ctx)
......
No preview for this file type
......@@ -71,13 +71,10 @@ def test_basic_vectorization():
update_rule = [
ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)
]
ast = ps.create_kernel(
update_rule,
target=target,
cpu_optim=ps.CpuOptimConfig(
vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
),
)
cfg = ps.CreateKernelConfig(target=target)
cfg.cpu.vectorize.enable = True
cfg.cpu.vectorize.assume_inner_stride_one = True
ast = ps.create_kernel(update_rule, cfg)
func = ast.compile()
......