Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 66-absolute-access-is-probably-not-copied-correctly-after-_eval_subs
  • const_fix
  • fhennig/v2.0-deprecations
  • fma
  • gpu_bufferfield_fix
  • gpu_liveness_opts
  • holzer-master-patch-46757
  • hyteg
  • improved_comm
  • master
  • nhr-course-week
  • target_dh_refactoring
  • v2.0-dev
  • vectorization_sqrt_fix
  • zikeliml/124-rework-tutorials
  • zikeliml/Task-96-dotExporterForAST
  • last/Kerncraft
  • last/LLVM
  • last/OpenCL
  • release/0.2.1
  • release/0.2.10
  • release/0.2.11
  • release/0.2.12
  • release/0.2.13
  • release/0.2.14
  • release/0.2.15
  • release/0.2.2
  • release/0.2.3
  • release/0.2.4
  • release/0.2.6
  • release/0.2.7
  • release/0.2.8
  • release/0.2.9
  • release/0.3.0
  • release/0.3.1
  • release/0.3.2
  • release/0.3.3
  • release/0.3.4
  • release/0.4.0
  • release/0.4.1
  • release/0.4.2
  • release/0.4.3
  • release/0.4.4
  • release/1.0
  • release/1.0.1
  • release/1.1
  • release/1.1.1
  • release/1.2
  • release/1.3
  • release/1.3.1
  • release/1.3.2
  • release/1.3.3
  • release/1.3.4
  • release/1.3.5
  • release/1.3.6
  • release/1.3.7
  • release/2.0.dev0
57 results

Target

Select target project
  • anirudh.jonnalagadda/pystencils
  • hyteg/pystencils
  • jbadwaik/pystencils
  • jngrad/pystencils
  • itischler/pystencils
  • ob28imeq/pystencils
  • hoenig/pystencils
  • Bindgen/pystencils
  • hammer/pystencils
  • da15siwa/pystencils
  • holzer/pystencils
  • alexander.reinauer/pystencils
  • ec93ujoh/pystencils
  • Harke/pystencils
  • seitz/pystencils
  • pycodegen/pystencils
16 results
Select Git revision
  • 66-absolute-access-is-probably-not-copied-correctly-after-_eval_subs
  • const_fix
  • fhennig/v2.0-deprecations
  • fma
  • gpu_bufferfield_fix
  • gpu_liveness_opts
  • holzer-master-patch-46757
  • hyteg
  • improved_comm
  • master
  • nhr-course-week
  • target_dh_refactoring
  • v2.0-dev
  • vectorization_sqrt_fix
  • zikeliml/124-rework-tutorials
  • zikeliml/Task-96-dotExporterForAST
  • last/Kerncraft
  • last/LLVM
  • last/OpenCL
  • release/0.2.1
  • release/0.2.10
  • release/0.2.11
  • release/0.2.12
  • release/0.2.13
  • release/0.2.14
  • release/0.2.15
  • release/0.2.2
  • release/0.2.3
  • release/0.2.4
  • release/0.2.6
  • release/0.2.7
  • release/0.2.8
  • release/0.2.9
  • release/0.3.0
  • release/0.3.1
  • release/0.3.2
  • release/0.3.3
  • release/0.3.4
  • release/0.4.0
  • release/0.4.1
  • release/0.4.2
  • release/0.4.3
  • release/0.4.4
  • release/1.0
  • release/1.0.1
  • release/1.1
  • release/1.1.1
  • release/1.2
  • release/1.3
  • release/1.3.1
  • release/1.3.2
  • release/1.3.3
  • release/1.3.4
  • release/1.3.5
  • release/1.3.6
  • release/1.3.7
  • release/2.0.dev0
57 results
Show changes
Showing
with 652 additions and 337 deletions
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import TYPE_CHECKING
from typing import Sequence from typing import Sequence
from collections import defaultdict from collections import defaultdict
...@@ -10,8 +9,6 @@ from ..ast import PsAstNode ...@@ -10,8 +9,6 @@ from ..ast import PsAstNode
from ..ast.structural import PsBlock, PsLoop, PsPragma from ..ast.structural import PsBlock, PsLoop, PsPragma
from ..ast.expressions import PsExpression from ..ast.expressions import PsExpression
if TYPE_CHECKING:
from ...codegen.config import OpenMpConfig
__all__ = ["InsertPragmasAtLoops", "LoopPragma", "AddOpenMP"] __all__ = ["InsertPragmasAtLoops", "LoopPragma", "AddOpenMP"]
...@@ -101,23 +98,40 @@ class InsertPragmasAtLoops: ...@@ -101,23 +98,40 @@ class InsertPragmasAtLoops:
class AddOpenMP: class AddOpenMP:
"""Apply OpenMP directives to loop nests. """Apply OpenMP directives to loop nests.
This transformation augments the AST with OpenMP pragmas according to the given This transformation augments the AST with OpenMP pragmas according to the given configuration.
`OpenMpConfig` configuration.
""" """
def __init__(self, ctx: KernelCreationContext, omp_params: OpenMpConfig) -> None: def __init__(
self,
ctx: KernelCreationContext,
nesting_depth: int = 0,
num_threads: int | None = None,
schedule: str | None = None,
collapse: int | None = None,
omit_parallel: bool = False,
) -> None:
pragma_text = "omp" pragma_text = "omp"
pragma_text += " parallel" if not omp_params.omit_parallel_construct else ""
pragma_text += f" for schedule({omp_params.schedule})"
if omp_params.num_threads is not None: if not omit_parallel:
pragma_text += f" num_threads({str(omp_params.num_threads)})" pragma_text += " parallel"
pragma_text += " for"
if schedule is not None:
pragma_text += f" schedule({schedule})"
if num_threads is not None:
pragma_text += f" num_threads({str(num_threads)})"
if omp_params.collapse > 0: if collapse is not None:
pragma_text += f" collapse({str(omp_params.collapse)})" if collapse <= 0:
raise ValueError(
f"Invalid value for OpenMP `collapse` clause: {collapse}"
)
pragma_text += f" collapse({str(collapse)})"
self._insert_pragmas = InsertPragmasAtLoops( self._insert_pragmas = InsertPragmasAtLoops(
ctx, [LoopPragma(pragma_text, omp_params.nesting_depth)] ctx, [LoopPragma(pragma_text, nesting_depth)]
) )
def __call__(self, node: PsAstNode) -> PsAstNode: def __call__(self, node: PsAstNode) -> PsAstNode:
......
from .target import Target from .target import Target
from .config import ( from .config import (
CreateKernelConfig, CreateKernelConfig,
CpuOptimConfig,
VectorizationConfig,
OpenMpConfig,
GpuIndexingConfig,
AUTO, AUTO,
) )
from .parameters import Parameter from .parameters import Parameter
...@@ -14,10 +10,6 @@ from .driver import create_kernel, get_driver ...@@ -14,10 +10,6 @@ from .driver import create_kernel, get_driver
__all__ = [ __all__ = [
"Target", "Target",
"CreateKernelConfig", "CreateKernelConfig",
"CpuOptimConfig",
"VectorizationConfig",
"OpenMpConfig",
"GpuIndexingConfig",
"AUTO", "AUTO",
"Parameter", "Parameter",
"Kernel", "Kernel",
......
This diff is collapsed.
...@@ -3,12 +3,20 @@ from typing import cast, Sequence, Iterable, TYPE_CHECKING ...@@ -3,12 +3,20 @@ from typing import cast, Sequence, Iterable, TYPE_CHECKING
from dataclasses import dataclass, replace from dataclasses import dataclass, replace
from .target import Target from .target import Target
from .config import CreateKernelConfig, OpenMpConfig, VectorizationConfig, AUTO from .config import (
CreateKernelConfig,
VectorizationOptions,
AUTO,
_AUTO_TYPE,
GhostLayerSpec,
IterationSliceSpec,
)
from .kernel import Kernel, GpuKernel, GpuThreadsRange from .kernel import Kernel, GpuKernel, GpuThreadsRange
from .properties import PsSymbolProperty, FieldShape, FieldStride, FieldBasePtr from .properties import PsSymbolProperty, FieldShape, FieldStride, FieldBasePtr
from .parameters import Parameter from .parameters import Parameter
from ..types import create_numeric_type, PsIntegerType, PsScalarType from ..field import Field
from ..types import PsIntegerType, PsScalarType
from ..backend.memory import PsSymbol from ..backend.memory import PsSymbol
from ..backend.ast import PsAstNode from ..backend.ast import PsAstNode
...@@ -105,15 +113,38 @@ class DefaultKernelCreationDriver: ...@@ -105,15 +113,38 @@ class DefaultKernelCreationDriver:
def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False): def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False):
self._cfg = cfg self._cfg = cfg
idx_dtype = create_numeric_type(self._cfg.index_dtype) # Data Type Options
assert isinstance(idx_dtype, PsIntegerType) idx_dtype: PsIntegerType = cfg.get_option("index_dtype")
default_dtype: PsScalarType = cfg.get_option("default_dtype")
# Iteration Space Options
num_ispace_options_set = (
int(cfg.is_option_set("ghost_layers"))
+ int(cfg.is_option_set("iteration_slice"))
+ int(cfg.is_option_set("index_field"))
)
if num_ispace_options_set > 1:
raise ValueError(
"At most one of the options 'ghost_layers' 'iteration_slice' and 'index_field' may be set."
)
self._ghost_layers: GhostLayerSpec | None = cfg.get_option("ghost_layers")
self._iteration_slice: IterationSliceSpec | None = cfg.get_option(
"iteration_slice"
)
self._index_field: Field | None = cfg.get_option("index_field")
if num_ispace_options_set == 0:
self._ghost_layers = AUTO
# Create the context
self._ctx = KernelCreationContext( self._ctx = KernelCreationContext(
default_dtype=create_numeric_type(self._cfg.default_dtype), default_dtype=default_dtype,
index_dtype=idx_dtype, index_dtype=idx_dtype,
) )
self._target = self._cfg.get_target() self._target = cfg.get_target()
self._platform = self._get_platform() self._platform = self._get_platform()
self._intermediates: CodegenIntermediates | None self._intermediates: CodegenIntermediates | None
...@@ -153,7 +184,7 @@ class DefaultKernelCreationDriver: ...@@ -153,7 +184,7 @@ class DefaultKernelCreationDriver:
self._intermediates.constants_eliminated = kernel_ast.clone() self._intermediates.constants_eliminated = kernel_ast.clone()
# Target-Specific optimizations # Target-Specific optimizations
if self._cfg.target.is_cpu(): if self._target.is_cpu():
kernel_ast = self._transform_for_cpu(kernel_ast) kernel_ast = self._transform_for_cpu(kernel_ast)
# Note: After this point, the AST may contain intrinsics, so type-dependent # Note: After this point, the AST may contain intrinsics, so type-dependent
...@@ -174,13 +205,13 @@ class DefaultKernelCreationDriver: ...@@ -174,13 +205,13 @@ class DefaultKernelCreationDriver:
canonicalize = CanonicalizeSymbols(self._ctx, True) canonicalize = CanonicalizeSymbols(self._ctx, True)
kernel_ast = cast(PsBlock, canonicalize(kernel_ast)) kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
if self._cfg.target.is_cpu(): if self._target.is_cpu():
return create_cpu_kernel_function( return create_cpu_kernel_function(
self._ctx, self._ctx,
self._platform, self._platform,
kernel_ast, kernel_ast,
self._cfg.function_name, self._cfg.get_option("function_name"),
self._cfg.target, self._target,
self._cfg.get_jit(), self._cfg.get_jit(),
) )
else: else:
...@@ -189,8 +220,8 @@ class DefaultKernelCreationDriver: ...@@ -189,8 +220,8 @@ class DefaultKernelCreationDriver:
self._platform, self._platform,
kernel_ast, kernel_ast,
gpu_threads, gpu_threads,
self._cfg.function_name, self._cfg.get_option("function_name"),
self._cfg.target, self._target,
self._cfg.get_jit(), self._cfg.get_jit(),
) )
...@@ -213,22 +244,26 @@ class DefaultKernelCreationDriver: ...@@ -213,22 +244,26 @@ class DefaultKernelCreationDriver:
) )
analysis(assignments) analysis(assignments)
if self._cfg.index_field is not None: if self._index_field is not None:
ispace = create_sparse_iteration_space( ispace = create_sparse_iteration_space(
self._ctx, assignments, index_field=self._cfg.index_field self._ctx, assignments, index_field=self._cfg.index_field
) )
else: else:
gls = self._cfg.ghost_layers gls: GhostLayerSpec | None
islice = self._cfg.iteration_slice if self._ghost_layers == AUTO:
infer_gls = True
if gls is None and islice is None: gls = None
gls = AUTO else:
assert not isinstance(self._ghost_layers, _AUTO_TYPE)
infer_gls = False
gls = self._ghost_layers
ispace = create_full_iteration_space( ispace = create_full_iteration_space(
self._ctx, self._ctx,
assignments, assignments,
ghost_layers=gls, ghost_layers=gls,
iteration_slice=islice, iteration_slice=self._iteration_slice,
infer_ghost_layers=infer_gls,
) )
self._ctx.set_iteration_space(ispace) self._ctx.set_iteration_space(ispace)
...@@ -257,7 +292,7 @@ class DefaultKernelCreationDriver: ...@@ -257,7 +292,7 @@ class DefaultKernelCreationDriver:
if self._intermediates is not None: if self._intermediates is not None:
self._intermediates.cpu_hoist_invariants = kernel_ast.clone() self._intermediates.cpu_hoist_invariants = kernel_ast.clone()
cpu_cfg = self._cfg.cpu_optim cpu_cfg = self._cfg.cpu
if cpu_cfg is None: if cpu_cfg is None:
return kernel_ast return kernel_ast
...@@ -266,30 +301,41 @@ class DefaultKernelCreationDriver: ...@@ -266,30 +301,41 @@ class DefaultKernelCreationDriver:
raise NotImplementedError("Loop blocking not implemented yet.") raise NotImplementedError("Loop blocking not implemented yet.")
kernel_ast = self._vectorize(kernel_ast) kernel_ast = self._vectorize(kernel_ast)
kernel_ast = self._add_openmp(kernel_ast)
if cpu_cfg.use_cacheline_zeroing:
raise NotImplementedError("CL-zeroing not implemented yet")
return kernel_ast
def _add_openmp(self, kernel_ast: PsBlock) -> PsBlock:
omp_options = self._cfg.cpu.openmp
enable_omp: bool = omp_options.get_option("enable")
if cpu_cfg.openmp is not False: if enable_omp:
from ..backend.transformations import AddOpenMP from ..backend.transformations import AddOpenMP
params = ( add_omp = AddOpenMP(
cpu_cfg.openmp self._ctx,
if isinstance(cpu_cfg.openmp, OpenMpConfig) nesting_depth=omp_options.get_option("nesting_depth"),
else OpenMpConfig() num_threads=omp_options.get_option("num_threads"),
schedule=omp_options.get_option("schedule"),
collapse=omp_options.get_option("collapse"),
omit_parallel=omp_options.get_option("omit_parallel_construct"),
) )
add_omp = AddOpenMP(self._ctx, params)
kernel_ast = cast(PsBlock, add_omp(kernel_ast)) kernel_ast = cast(PsBlock, add_omp(kernel_ast))
if self._intermediates is not None: if self._intermediates is not None:
self._intermediates.cpu_openmp = kernel_ast.clone() self._intermediates.cpu_openmp = kernel_ast.clone()
if cpu_cfg.use_cacheline_zeroing:
raise NotImplementedError("CL-zeroing not implemented yet")
return kernel_ast return kernel_ast
def _vectorize(self, kernel_ast: PsBlock) -> PsBlock: def _vectorize(self, kernel_ast: PsBlock) -> PsBlock:
assert self._cfg.cpu_optim is not None vec_options = self._cfg.cpu.vectorize
vec_config = self._cfg.cpu_optim.get_vectorization_config()
if vec_config is None: enable_vec = vec_options.get_option("enable")
if not enable_vec:
return kernel_ast return kernel_ast
from ..backend.transformations import LoopVectorizer, SelectIntrinsics from ..backend.transformations import LoopVectorizer, SelectIntrinsics
...@@ -306,7 +352,9 @@ class DefaultKernelCreationDriver: ...@@ -306,7 +352,9 @@ class DefaultKernelCreationDriver:
inner_loop_dim = ispace.dimensions[inner_loop_coord] inner_loop_dim = ispace.dimensions[inner_loop_coord]
# Apply stride (TODO: and alignment) assumptions # Apply stride (TODO: and alignment) assumptions
if vec_config.assume_inner_stride_one: assume_unit_stride: bool = vec_options.get_option("assume_inner_stride_one")
if assume_unit_stride:
for field in self._ctx.fields: for field in self._ctx.fields:
buf = self._ctx.get_buffer(field) buf = self._ctx.get_buffer(field)
inner_stride = buf.strides[inner_loop_coord] inner_stride = buf.strides[inner_loop_coord]
...@@ -322,14 +370,14 @@ class DefaultKernelCreationDriver: ...@@ -322,14 +370,14 @@ class DefaultKernelCreationDriver:
# TODO: Communicate assumption to runtime system via a precondition # TODO: Communicate assumption to runtime system via a precondition
# Call loop vectorizer # Call loop vectorizer
if vec_config.lanes is None: num_lanes: int | None = vec_options.get_option("lanes")
lanes = VectorizationConfig.default_lanes(
if num_lanes is None:
num_lanes = VectorizationOptions.default_lanes(
self._target, cast(PsScalarType, self._ctx.default_dtype) self._target, cast(PsScalarType, self._ctx.default_dtype)
) )
else:
lanes = vec_config.lanes
vectorizer = LoopVectorizer(self._ctx, lanes) vectorizer = LoopVectorizer(self._ctx, num_lanes)
def loop_predicate(loop: PsLoop): def loop_predicate(loop: PsLoop):
return loop.counter.symbol == inner_loop_dim.counter return loop.counter.symbol == inner_loop_dim.counter
...@@ -375,15 +423,30 @@ class DefaultKernelCreationDriver: ...@@ -375,15 +423,30 @@ class DefaultKernelCreationDriver:
) )
elif Target._GPU in self._target: elif Target._GPU in self._target:
gpu_opts = self._cfg.gpu
omit_range_check: bool = gpu_opts.get_option("omit_range_check")
match self._target: match self._target:
case Target.SYCL: case Target.SYCL:
from ..backend.platforms import SyclPlatform from ..backend.platforms import SyclPlatform
return SyclPlatform(self._ctx, self._cfg.gpu_indexing) auto_block_size: bool = self._cfg.sycl.get_option("automatic_block_size")
return SyclPlatform(
self._ctx,
omit_range_check=omit_range_check,
automatic_block_size=auto_block_size,
)
case Target.CUDA: case Target.CUDA:
from ..backend.platforms import CudaPlatform from ..backend.platforms import CudaPlatform
return CudaPlatform(self._ctx, self._cfg.gpu_indexing) manual_grid = gpu_opts.get_option("manual_launch_grid")
return CudaPlatform(
self._ctx,
omit_range_check=omit_range_check,
manual_launch_grid=manual_grid,
)
raise NotImplementedError( raise NotImplementedError(
f"Code generation for target {self._target} not implemented" f"Code generation for target {self._target} not implemented"
......
...@@ -35,7 +35,7 @@ class PsCustomType(PsType): ...@@ -35,7 +35,7 @@ class PsCustomType(PsType):
return self._name return self._name
def c_string(self) -> str: def c_string(self) -> str:
return f"{self._const_string()} {self._name}" return f"{self._const_string()}{self._name}"
def __repr__(self) -> str: def __repr__(self) -> str:
return f"CustomType( {self.name}, const={self.const} )" return f"CustomType( {self.name}, const={self.const} )"
......
import pytest
from dataclasses import dataclass
import numpy as np
from pystencils.codegen.config import (
BasicOption,
Option,
Category,
ConfigBase,
CreateKernelConfig,
CpuOptions
)
from pystencils.field import Field, FieldType
from pystencils.types.quick import Int, UInt, Fp, Ptr
from pystencils.types import PsVectorType
def test_descriptors():
@dataclass
class SampleCategory(ConfigBase):
val1: BasicOption[int] = BasicOption(2)
val2: Option[bool, str | bool] = Option(False)
@val2.validate
def validate_val2(self, v: str | bool):
if isinstance(v, str):
if v.lower() in ("off", "false", "no"):
return False
elif v.lower() in ("on", "true", "yes"):
return True
raise ValueError()
else:
return v
@dataclass
class SampleConfig(ConfigBase):
cat: Category[SampleCategory] = Category(SampleCategory())
val: BasicOption[str] = BasicOption("fallback")
cfg = SampleConfig()
# Check unset and default values
assert cfg.val is None
assert cfg.get_option("val") == "fallback"
# Check setting
cfg.val = "test"
assert cfg.val == "test"
assert cfg.get_option("val") == "test"
assert cfg.is_option_set("val")
# Check unsetting
cfg.val = None
assert not cfg.is_option_set("val")
assert cfg.val is None
# Check category
assert cfg.cat.val1 is None
assert cfg.cat.get_option("val1") == 2
assert cfg.cat.val2 is None
assert cfg.cat.get_option("val2") is False
# Check copy on category setting
c = SampleCategory(32, "on")
cfg.cat = c
assert cfg.cat.val1 == 32
assert cfg.cat.val2 is True
assert cfg.cat is not c
c.val1 = 13
assert cfg.cat.val1 == 32
# Check that category objects on two config objects are not the same
cfg1 = SampleConfig()
cfg2 = SampleConfig()
assert cfg1.cat is not cfg2.cat
def test_category_init():
cfg1 = CreateKernelConfig()
cfg2 = CreateKernelConfig()
assert cfg1.cpu is not cfg2.cpu
assert cfg1.cpu.openmp is not cfg2.cpu.openmp
assert cfg1.cpu.vectorize is not cfg2.cpu.vectorize
assert cfg1.gpu is not cfg2.gpu
def test_category_copy():
cfg = CreateKernelConfig()
cpu_repl = CpuOptions()
cpu_repl.openmp.num_threads = 42
cfg.cpu = cpu_repl
assert cfg.cpu.openmp.num_threads == 42
assert cfg.cpu is not cpu_repl
assert cfg.cpu.openmp is not cpu_repl.openmp
def test_config_validation():
# Check index dtype validation
cfg = CreateKernelConfig(index_dtype="int32")
assert cfg.index_dtype == Int(32)
cfg.index_dtype = np.uint64
assert cfg.index_dtype == UInt(64)
with pytest.raises(ValueError):
_ = CreateKernelConfig(index_dtype=np.float32)
with pytest.raises(ValueError):
cfg.index_dtype = "double"
# Check default dtype validation
cfg = CreateKernelConfig(default_dtype="float32")
assert cfg.default_dtype == Fp(32)
cfg.default_dtype = np.int64
assert cfg.default_dtype == Int(64)
with pytest.raises(ValueError):
cfg.default_dtype = PsVectorType(Fp(64), 4)
with pytest.raises(ValueError):
_ = CreateKernelConfig(default_dtype=Ptr(Fp(32)))
# Check index field validation
idx_field = Field.create_generic(
"idx", spatial_dimensions=1, field_type=FieldType.INDEXED
)
cfg.index_field = idx_field
assert cfg.index_field == idx_field
with pytest.raises(ValueError):
cfg.index_field = Field.create_generic(
"idx", spatial_dimensions=1, field_type=FieldType.GENERIC
)
def test_override():
cfg1 = CreateKernelConfig()
cfg1.function_name = "test"
cfg1.cpu.openmp.schedule = "dynamic"
cfg1.gpu.manual_launch_grid = False
cfg1.allow_double_writes = True
cfg2 = CreateKernelConfig()
cfg2.function_name = "func"
cfg2.cpu.openmp.schedule = "static(5)"
cfg2.cpu.vectorize.lanes = 12
cfg2.allow_double_writes = False
cfg1.override(cfg2)
assert cfg1.function_name == "func"
assert cfg1.cpu.openmp.schedule == "static(5)"
assert cfg1.cpu.openmp.enable is None
assert cfg1.cpu.vectorize.lanes == 12
assert cfg1.cpu.vectorize.assume_aligned is None
assert cfg1.allow_double_writes is False
...@@ -41,7 +41,7 @@ def target(request) -> ps.Target: ...@@ -41,7 +41,7 @@ def target(request) -> ps.Target:
@pytest.fixture @pytest.fixture
def gen_config(target: ps.Target): def gen_config(target: ps.Target):
"""Default codegen configuration for the current target. """Default codegen configuration for the current target.
For GPU targets, set default indexing options. For GPU targets, set default indexing options.
For vector-CPU targets, set default vectorization config. For vector-CPU targets, set default vectorization config.
""" """
...@@ -49,12 +49,8 @@ def gen_config(target: ps.Target): ...@@ -49,12 +49,8 @@ def gen_config(target: ps.Target):
gen_config = ps.CreateKernelConfig(target=target) gen_config = ps.CreateKernelConfig(target=target)
if target.is_vector_cpu(): if target.is_vector_cpu():
gen_config = replace( gen_config.cpu.vectorize.enable = True
gen_config, gen_config.cpu.vectorize.assume_inner_stride_one = True
cpu_optim=ps.CpuOptimConfig(
vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
),
)
return gen_config return gen_config
...@@ -62,13 +58,15 @@ def gen_config(target: ps.Target): ...@@ -62,13 +58,15 @@ def gen_config(target: ps.Target):
@pytest.fixture() @pytest.fixture()
def xp(target: ps.Target) -> ModuleType: def xp(target: ps.Target) -> ModuleType:
"""Primary array module for the current target. """Primary array module for the current target.
Returns: Returns:
`cupy` if `target == Target.CUDA`, and `numpy` otherwise `cupy` if `target == Target.CUDA`, and `numpy` otherwise
""" """
if target == ps.Target.CUDA: if target == ps.Target.CUDA:
import cupy as xp import cupy as xp
return xp return xp
else: else:
import numpy as np import numpy as np
return np return np
...@@ -299,8 +299,7 @@ def test_iteration_slices(gpu_indexing): ...@@ -299,8 +299,7 @@ def test_iteration_slices(gpu_indexing):
gpu_src_arr.set(src_arr) gpu_src_arr.set(src_arr)
gpu_dst_arr.fill(0) gpu_dst_arr.fill(0)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice, config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice)
gpu_indexing=gpu_indexing)
pack_code = create_kernel(pack_eqs, config=config) pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile() pack_kernel = pack_code.compile()
...@@ -312,8 +311,7 @@ def test_iteration_slices(gpu_indexing): ...@@ -312,8 +311,7 @@ def test_iteration_slices(gpu_indexing):
eq = Assignment(dst_field(idx), buffer(idx)) eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq) unpack_eqs.append(eq)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice, config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice)
gpu_indexing=gpu_indexing)
unpack_code = create_kernel(unpack_eqs, config=config) unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile() unpack_kernel = unpack_code.compile()
......
...@@ -112,7 +112,7 @@ def test_ghost_layer(): ...@@ -112,7 +112,7 @@ def test_ghost_layer():
update_rule = Assignment(dst_field[0, 0], src_field[0, 0]) update_rule = Assignment(dst_field[0, 0], src_field[0, 0])
ghost_layers = [(1, 2), (2, 1)] ghost_layers = [(1, 2), (2, 1)]
config = CreateKernelConfig(target=Target.GPU, ghost_layers=ghost_layers, gpu_indexing="line") config = CreateKernelConfig(target=Target.GPU, ghost_layers=ghost_layers, gpu="line")
ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config) ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config)
kernel = ast.compile() kernel = ast.compile()
...@@ -135,7 +135,7 @@ def test_setting_value(): ...@@ -135,7 +135,7 @@ def test_setting_value():
f = Field.create_generic("f", 2) f = Field.create_generic("f", 2)
update_rule = [Assignment(f(0), sp.Symbol("value"))] update_rule = [Assignment(f(0), sp.Symbol("value"))]
config = CreateKernelConfig(target=Target.GPU, gpu_indexing="line", iteration_slice=iteration_slice) config = CreateKernelConfig(target=Target.GPU, gpu="line", iteration_slice=iteration_slice)
ast = create_kernel(sympy_cse_on_assignment_list(update_rule), config=config) ast = create_kernel(sympy_cse_on_assignment_list(update_rule), config=config)
kernel = ast.compile() kernel = ast.compile()
...@@ -207,7 +207,7 @@ def test_four_dimensional_kernel(gpu_indexing, layout, shape): ...@@ -207,7 +207,7 @@ def test_four_dimensional_kernel(gpu_indexing, layout, shape):
f = Field.create_from_numpy_array("f", arr_cpu) f = Field.create_from_numpy_array("f", arr_cpu)
update_rule = [Assignment(f.center, sp.Symbol("value"))] update_rule = [Assignment(f.center, sp.Symbol("value"))]
config = CreateKernelConfig(target=Target.GPU, gpu_indexing=gpu_indexing, iteration_slice=iteration_slice) config = CreateKernelConfig(target=Target.GPU, gpu=gpu_indexing, iteration_slice=iteration_slice)
ast = create_kernel(update_rule, config=config) ast = create_kernel(update_rule, config=config)
kernel = ast.compile() kernel = ast.compile()
......
...@@ -13,7 +13,6 @@ from pystencils import ( ...@@ -13,7 +13,6 @@ from pystencils import (
make_slice, make_slice,
Target, Target,
CreateKernelConfig, CreateKernelConfig,
GpuIndexingConfig,
DynamicType, DynamicType,
) )
from pystencils.sympyextensions.integer_functions import int_rem from pystencils.sympyextensions.integer_functions import int_rem
...@@ -81,7 +80,7 @@ def test_numerical_slices(gen_config: CreateKernelConfig, xp, islice): ...@@ -81,7 +80,7 @@ def test_numerical_slices(gen_config: CreateKernelConfig, xp, islice):
try: try:
kernel = create_kernel(update, gen_config).compile() kernel = create_kernel(update, gen_config).compile()
except NotImplementedError: except NotImplementedError:
if gen_config.target.is_vector_cpu(): if gen_config.get_target().is_vector_cpu():
# TODO Gather/Scatter not implemented yet # TODO Gather/Scatter not implemented yet
pytest.xfail("Gather/Scatter not available yet") pytest.xfail("Gather/Scatter not available yet")
...@@ -104,6 +103,9 @@ def test_symbolic_slice(gen_config: CreateKernelConfig, xp): ...@@ -104,6 +103,9 @@ def test_symbolic_slice(gen_config: CreateKernelConfig, xp):
update = Assignment(f.center(), 1) update = Assignment(f.center(), 1)
islice = make_slice[sy:ey, sx:ex] islice = make_slice[sy:ey, sx:ex]
gen_config = replace(gen_config, iteration_slice=islice) gen_config = replace(gen_config, iteration_slice=islice)
print(repr(gen_config))
kernel = create_kernel(update, gen_config).compile() kernel = create_kernel(update, gen_config).compile()
for slic in [make_slice[:, :], make_slice[1:-1, 2:-2], make_slice[8:14, 7:11]]: for slic in [make_slice[:, :], make_slice[1:-1, 2:-2], make_slice[8:14, 7:11]]:
...@@ -140,9 +142,7 @@ def test_triangle_pattern(gen_config: CreateKernelConfig, xp): ...@@ -140,9 +142,7 @@ def test_triangle_pattern(gen_config: CreateKernelConfig, xp):
gen_config = replace(gen_config, iteration_slice=islice) gen_config = replace(gen_config, iteration_slice=islice)
if gen_config.target == Target.CUDA: if gen_config.target == Target.CUDA:
gen_config = replace( gen_config.gpu.manual_launch_grid = True
gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
)
kernel = create_kernel(update, gen_config).compile() kernel = create_kernel(update, gen_config).compile()
...@@ -170,17 +170,15 @@ def test_red_black_pattern(gen_config: CreateKernelConfig, xp): ...@@ -170,17 +170,15 @@ def test_red_black_pattern(gen_config: CreateKernelConfig, xp):
outer_counter = DEFAULTS.spatial_counters[0] outer_counter = DEFAULTS.spatial_counters[0]
start = sp.Piecewise((0, sp.Eq(int_rem(outer_counter, 2), 0)), (1, True)) start = sp.Piecewise((0, sp.Eq(int_rem(outer_counter, 2), 0)), (1, True))
islice = make_slice[:, start::2] islice = make_slice[:, start::2]
gen_config = replace(gen_config, iteration_slice=islice) gen_config.iteration_slice = islice
if gen_config.target == Target.CUDA: if gen_config.target == Target.CUDA:
gen_config = replace( gen_config.gpu.manual_launch_grid = True
gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
)
try: try:
kernel = create_kernel(update, gen_config).compile() kernel = create_kernel(update, gen_config).compile()
except NotImplementedError: except NotImplementedError:
if gen_config.target.is_vector_cpu(): if gen_config.get_target().is_vector_cpu():
pytest.xfail("Gather/Scatter not implemented yet") pytest.xfail("Gather/Scatter not implemented yet")
if isinstance(kernel, CupyKernelWrapper): if isinstance(kernel, CupyKernelWrapper):
......
...@@ -4,8 +4,6 @@ from pystencils import ( ...@@ -4,8 +4,6 @@ from pystencils import (
Assignment, Assignment,
create_kernel, create_kernel,
CreateKernelConfig, CreateKernelConfig,
CpuOptimConfig,
OpenMpConfig,
Target, Target,
) )
...@@ -15,21 +13,18 @@ from pystencils.backend.ast.structural import PsLoop, PsPragma ...@@ -15,21 +13,18 @@ from pystencils.backend.ast.structural import PsLoop, PsPragma
@pytest.mark.parametrize("nesting_depth", range(3)) @pytest.mark.parametrize("nesting_depth", range(3))
@pytest.mark.parametrize("schedule", ["static", "static,16", "dynamic", "auto"]) @pytest.mark.parametrize("schedule", ["static", "static,16", "dynamic", "auto"])
@pytest.mark.parametrize("collapse", range(3)) @pytest.mark.parametrize("collapse", [None, 1, 2])
@pytest.mark.parametrize("omit_parallel_construct", range(3)) @pytest.mark.parametrize("omit_parallel_construct", range(3))
def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct): def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
f, g = fields("f, g: [3D]") f, g = fields("f, g: [3D]")
asm = Assignment(f.center(0), g.center(0)) asm = Assignment(f.center(0), g.center(0))
omp = OpenMpConfig( gen_config = CreateKernelConfig(target=Target.CPU)
nesting_depth=nesting_depth, gen_config.cpu.openmp.enable = True
schedule=schedule, gen_config.cpu.openmp.nesting_depth = nesting_depth
collapse=collapse, gen_config.cpu.openmp.schedule = schedule
omit_parallel_construct=omit_parallel_construct, gen_config.cpu.openmp.collapse = collapse
) gen_config.cpu.openmp.omit_parallel_construct = omit_parallel_construct
gen_config = CreateKernelConfig(
target=Target.CPU, cpu_optim=CpuOptimConfig(openmp=omp)
)
kernel = create_kernel(asm, gen_config) kernel = create_kernel(asm, gen_config)
ast = kernel.body ast = kernel.body
...@@ -52,10 +47,10 @@ def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct): ...@@ -52,10 +47,10 @@ def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
pragma = find_omp_pragma(ast) pragma = find_omp_pragma(ast)
tokens = set(pragma.text.split()) tokens = set(pragma.text.split())
expected_tokens = {"omp", "for", f"schedule({omp.schedule})"} expected_tokens = {"omp", "for", f"schedule({schedule})"}
if not omp.omit_parallel_construct: if not omit_parallel_construct:
expected_tokens.add("parallel") expected_tokens.add("parallel")
if omp.collapse > 0: if collapse is not None:
expected_tokens.add(f"collapse({omp.collapse})") expected_tokens.add(f"collapse({collapse})")
assert tokens == expected_tokens assert tokens == expected_tokens
import pytest
from pystencils.field import Field, FieldType
from pystencils.types.quick import *
from pystencils.codegen.config import (
CreateKernelConfig,
PsOptionsError,
)
def test_invalid_iteration_region_options():
idx_field = Field.create_generic(
"idx", spatial_dimensions=1, field_type=FieldType.INDEXED
)
with pytest.raises(PsOptionsError):
CreateKernelConfig(
ghost_layers=2, iteration_slice=(slice(1, -1), slice(1, -1))
)
with pytest.raises(PsOptionsError):
CreateKernelConfig(ghost_layers=2, index_field=idx_field)
def test_index_field_options():
with pytest.raises(PsOptionsError):
idx_field = Field.create_generic(
"idx", spatial_dimensions=1, field_type=FieldType.GENERIC
)
CreateKernelConfig(index_field=idx_field)
...@@ -12,6 +12,7 @@ from pystencils.backend.ast import dfs_preorder ...@@ -12,6 +12,7 @@ from pystencils.backend.ast import dfs_preorder
from pystencils.backend.ast.structural import PsBlock, PsPragma, PsLoop from pystencils.backend.ast.structural import PsBlock, PsPragma, PsLoop
from pystencils.backend.transformations import InsertPragmasAtLoops, LoopPragma from pystencils.backend.transformations import InsertPragmasAtLoops, LoopPragma
def test_insert_pragmas(): def test_insert_pragmas():
ctx = KernelCreationContext() ctx = KernelCreationContext()
factory = AstFactory(ctx) factory = AstFactory(ctx)
......
No preview for this file type
...@@ -71,13 +71,10 @@ def test_basic_vectorization(): ...@@ -71,13 +71,10 @@ def test_basic_vectorization():
update_rule = [ update_rule = [
ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0) ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)
] ]
ast = ps.create_kernel( cfg = ps.CreateKernelConfig(target=target)
update_rule, cfg.cpu.vectorize.enable = True
target=target, cfg.cpu.vectorize.assume_inner_stride_one = True
cpu_optim=ps.CpuOptimConfig( ast = ps.create_kernel(update_rule, cfg)
vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
),
)
func = ast.compile() func = ast.compile()
......