From a898a3baec6386e83a775b1bec73a5f18cd60110 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Fri, 17 Jan 2025 19:27:01 +0100
Subject: [PATCH 1/7] config descriptors

---
 src/pystencils/codegen/config.py | 131 ++++++++++++++++++++++++++++++-
 tests/codegen/test_config.py     |  57 ++++++++++++++
 2 files changed, 186 insertions(+), 2 deletions(-)
 create mode 100644 tests/codegen/test_config.py

diff --git a/src/pystencils/codegen/config.py b/src/pystencils/codegen/config.py
index 3a7647907..83602dbbc 100644
--- a/src/pystencils/codegen/config.py
+++ b/src/pystencils/codegen/config.py
@@ -2,10 +2,12 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
 
 from warnings import warn
+from abc import ABC
 from collections.abc import Collection
+from copy import copy
 
-from typing import Sequence
-from dataclasses import dataclass, InitVar, replace
+from typing import Sequence, Generic, TypeVar, Callable, Any, cast
+from dataclasses import dataclass, InitVar, replace, fields
 
 from .target import Target
 from ..field import Field, FieldType
@@ -28,6 +30,131 @@ class PsOptionsError(Exception):
     """Indicates an option clash in the `CreateKernelConfig`."""
 
 
+Option_T = TypeVar("Option_T")
+Arg_T = TypeVar("Arg_T")
+
+
+class Option(Generic[Option_T, Arg_T]):
+    """Option descriptor.
+
+    This descriptor is used to model configuration options.
+    It maintains a default value for the option that is used when no value
+    was specified by the user.
+
+    In configuration options, the value `None` stands for `unset`.
+    It can therefore not be used to set an option to the meaning "not any", or "empty"
+    - for these, special values need to be used.
+
+    The Option allows a validator function to be specified,
+    which will be called to perform sanity checks on user-provided values.
+
+    Through the validator, options may also be set from arguments of a different type (`Arg_T`)
+    than their value type (`Option_T`). If `Arg_T` is different from `Option_T`,
+    the validator must perform the conversion from the former to the latter.
+    """
+
+    def __init__(
+        self,
+        default: Option_T | None = None,
+        validator: Callable[[Any, Arg_T | None], Option_T | None] | None = None,
+    ) -> None:
+        self._default = default
+        self._validator = validator
+        self._name: str
+        self._lookup: str
+
+    def validate(self, validator: Callable[[Any, Any], Any] | None):
+        self._validator = validator
+        return validator
+
+    @property
+    def default(self) -> Option_T | None:
+        return self._default
+
+    def get(self, obj) -> Option_T | None:
+        val = getattr(obj, self._lookup, None)
+        if val is None:
+            return self._default
+        else:
+            return val
+        
+    def is_set(self, obj) -> bool:
+        return getattr(obj, self._lookup, None) is not None
+
+    def __set_name__(self, owner, name: str):
+        self._name = name
+        self._lookup = f"_{name}"
+
+    def __get__(self, obj, objtype=None) -> Option_T | None:
+        if obj is None:
+            return None
+
+        return getattr(obj, self._lookup, None)
+
+    def __set__(self, obj, arg: Arg_T | None):
+        if arg is not None and self._validator is not None:
+            value = self._validator(obj, arg)
+        else:
+            value = cast(Option_T, arg)
+        setattr(obj, self._lookup, value)
+
+    def __delete__(self, obj):
+        delattr(obj, self._lookup)
+
+
+class SimpleOption(Option[Option_T, Option_T]):
+    ...
+
+
+class ConfigBase(ABC):
+    def get_option(self, name: str) -> Any:
+        """Get the value set for the specified option, or the option's default value if none has been set."""
+        descr: Option = type(self).__dict__[name]
+        return descr.get(self)
+    
+    def is_option_set(self, name: str) -> bool:
+        descr: Option = type(self).__dict__[name]
+        return descr.is_set(self)
+
+    def override(self, other: ConfigBase):
+        for f in fields(self):  # type: ignore
+            fvalue = getattr(self, f.name)
+            if isinstance(fvalue, ConfigBase):  # type: ignore
+                fvalue.override(getattr(other, f.name))
+            else:
+                new_val = getattr(other, f.name)
+                if new_val is not None:
+                    setattr(self, f.name, new_val)
+
+
+Category_T = TypeVar("Category_T", bound=ConfigBase)
+
+
+class Category(Generic[Category_T]):
+    """Descriptor for a category of options.
+    
+    This descriptor makes sure that when an entire category is set to an object,
+    that object is copied immediately such that later changes to the original
+    do not affect this configuration.
+    """
+
+    def __init__(self, default: Category_T):
+        self._default = default
+
+    def __set_name__(self, owner, name: str):
+        self._name = name
+        self._lookup = f"_{name}"
+
+    def __get__(self, obj, objtype=None) -> Category_T:
+        if obj is None:
+            return self._default
+
+        return cast(Category_T, getattr(obj, self._lookup, None))
+
+    def __set__(self, obj, cat: Category_T):
+        setattr(obj, self._lookup, copy(cat))
+
+
 class _AUTO_TYPE: ...  # noqa: E701
 
 
diff --git a/tests/codegen/test_config.py b/tests/codegen/test_config.py
new file mode 100644
index 000000000..613c279c3
--- /dev/null
+++ b/tests/codegen/test_config.py
@@ -0,0 +1,57 @@
+from dataclasses import dataclass
+from pystencils.codegen.config import SimpleOption, Option, Category, ConfigBase
+
+
+def test_descriptors():
+
+    @dataclass
+    class SampleCategory(ConfigBase):
+        val1: SimpleOption[int] = SimpleOption(2)
+        val2: Option[bool, str] = Option(False)
+
+        @val2.validate
+        def _val2(self, v: str):
+            if v.lower() in ("off", "false", "no"):
+                return False
+            elif v.lower() in ("on", "true", "yes"):
+                return True
+            
+            raise ValueError()
+
+    @dataclass
+    class SampleConfig(ConfigBase):
+        cat: Category[SampleCategory] = Category(SampleCategory())
+        val: SimpleOption[str] = SimpleOption("fallback")
+
+    cfg = SampleConfig()
+    
+    #   Check unset and default values
+    assert cfg.val is None
+    assert cfg.get_option("val") == "fallback"
+
+    #   Check setting
+    cfg.val = "test"
+    assert cfg.val == "test"
+    assert cfg.get_option("val") == "test"
+    assert cfg.is_option_set("val")
+
+    #   Check unsetting
+    cfg.val = None
+    assert not cfg.is_option_set("val")
+    assert cfg.val is None
+
+    #   Check category
+    assert cfg.cat.val1 is None
+    assert cfg.cat.get_option("val1") == 2
+    assert cfg.cat.val2 is None
+    assert cfg.cat.get_option("val2") is False
+
+    #   Check copy on category setting
+    c = SampleCategory(32, "on")
+    cfg.cat = c
+    assert cfg.cat.val1 == 32
+    assert cfg.cat.val2 is True
+    
+    assert cfg.cat is not c
+    c.val1 = 13
+    assert cfg.cat.val1 == 32
-- 
GitLab


From 11c30f08cb2a00d8caa6ffb6f7ca47b351b3bda2 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Fri, 17 Jan 2025 20:20:29 +0100
Subject: [PATCH 2/7] Update config classes to use descriptors. Update parts of
 the codegen driver

---
 src/pystencils/__init__.py                    |  12 +-
 src/pystencils/backend/platforms/cuda.py      |   8 +-
 src/pystencils/backend/platforms/sycl.py      |   8 +-
 .../backend/transformations/add_pragmas.py    |   4 +-
 src/pystencils/codegen/__init__.py            |  12 +-
 src/pystencils/codegen/config.py              | 282 ++++++++----------
 src/pystencils/codegen/driver.py              |  50 +++-
 tests/codegen/test_config.py                  |  25 +-
 tests/fixtures.py                             |   2 +-
 tests/kernelcreation/test_iteration_slices.py |   6 +-
 tests/nbackend/kernelcreation/test_openmp.py  |   8 +-
 tests/test_quicktests.py                      |   2 +-
 12 files changed, 215 insertions(+), 204 deletions(-)

diff --git a/src/pystencils/__init__.py b/src/pystencils/__init__.py
index 6cb375b61..2bb4aac3d 100644
--- a/src/pystencils/__init__.py
+++ b/src/pystencils/__init__.py
@@ -3,10 +3,10 @@
 from .codegen import (
     Target,
     CreateKernelConfig,
-    CpuOptimConfig,
+    CpuOptions,
     VectorizationConfig,
-    OpenMpConfig,
-    GpuIndexingConfig,
+    OpenMpOptions,
+    GpuOptions,
     AUTO
 )
 from .defaults import DEFAULTS
@@ -50,10 +50,10 @@ __all__ = [
     "create_numeric_type",
     "make_slice",
     "CreateKernelConfig",
-    "CpuOptimConfig",
+    "CpuOptions",
     "VectorizationConfig",
-    "GpuIndexingConfig",
-    "OpenMpConfig",
+    "GpuOptions",
+    "OpenMpOptions",
     "AUTO",
     "create_kernel",
     "create_staggered_kernel",
diff --git a/src/pystencils/backend/platforms/cuda.py b/src/pystencils/backend/platforms/cuda.py
index f146cfbfd..31686cb18 100644
--- a/src/pystencils/backend/platforms/cuda.py
+++ b/src/pystencils/backend/platforms/cuda.py
@@ -30,7 +30,7 @@ from ..literals import PsLiteral
 from ..functions import PsMathFunction, MathFunctions, CFunction
 
 if TYPE_CHECKING:
-    from ...codegen import GpuIndexingConfig, GpuThreadsRange
+    from ...codegen import GpuOptions, GpuThreadsRange
 
 int32 = PsSignedIntegerType(width=32, const=False)
 
@@ -52,13 +52,13 @@ class CudaPlatform(GenericGpu):
     """Platform for CUDA-based GPUs."""
 
     def __init__(
-        self, ctx: KernelCreationContext, indexing_cfg: GpuIndexingConfig | None = None
+        self, ctx: KernelCreationContext, indexing_cfg: GpuOptions | None = None
     ) -> None:
         super().__init__(ctx)
 
-        from ...codegen.config import GpuIndexingConfig
+        from ...codegen.config import GpuOptions
 
-        self._cfg = indexing_cfg if indexing_cfg is not None else GpuIndexingConfig()
+        self._cfg = indexing_cfg if indexing_cfg is not None else GpuOptions()
         self._typify = Typifier(ctx)
 
     @property
diff --git a/src/pystencils/backend/platforms/sycl.py b/src/pystencils/backend/platforms/sycl.py
index 9c04d6074..b5ba7b6c4 100644
--- a/src/pystencils/backend/platforms/sycl.py
+++ b/src/pystencils/backend/platforms/sycl.py
@@ -30,19 +30,19 @@ from ..exceptions import MaterializationError
 from ...types import PsCustomType, PsIeeeFloatType, constify, PsIntegerType
 
 if TYPE_CHECKING:
-    from ...codegen import GpuIndexingConfig, GpuThreadsRange
+    from ...codegen import GpuOptions, GpuThreadsRange
 
 
 class SyclPlatform(GenericGpu):
 
     def __init__(
-        self, ctx: KernelCreationContext, indexing_cfg: GpuIndexingConfig | None = None
+        self, ctx: KernelCreationContext, indexing_cfg: GpuOptions | None = None
     ):
         super().__init__(ctx)
 
-        from ...codegen.config import GpuIndexingConfig
+        from ...codegen.config import GpuOptions
 
-        self._cfg = indexing_cfg if indexing_cfg is not None else GpuIndexingConfig()
+        self._cfg = indexing_cfg if indexing_cfg is not None else GpuOptions()
 
     @property
     def required_headers(self) -> set[str]:
diff --git a/src/pystencils/backend/transformations/add_pragmas.py b/src/pystencils/backend/transformations/add_pragmas.py
index 78e721f38..b033e4d58 100644
--- a/src/pystencils/backend/transformations/add_pragmas.py
+++ b/src/pystencils/backend/transformations/add_pragmas.py
@@ -11,7 +11,7 @@ from ..ast.structural import PsBlock, PsLoop, PsPragma
 from ..ast.expressions import PsExpression
 
 if TYPE_CHECKING:
-    from ...codegen.config import OpenMpConfig
+    from ...codegen.config import OpenMpOptions
 
 __all__ = ["InsertPragmasAtLoops", "LoopPragma", "AddOpenMP"]
 
@@ -105,7 +105,7 @@ class AddOpenMP:
     `OpenMpConfig` configuration.
     """
 
-    def __init__(self, ctx: KernelCreationContext, omp_params: OpenMpConfig) -> None:
+    def __init__(self, ctx: KernelCreationContext, omp_params: OpenMpOptions) -> None:
         pragma_text = "omp"
         pragma_text += " parallel" if not omp_params.omit_parallel_construct else ""
         pragma_text += f" for schedule({omp_params.schedule})"
diff --git a/src/pystencils/codegen/__init__.py b/src/pystencils/codegen/__init__.py
index e27b94b9e..da33f9ee2 100644
--- a/src/pystencils/codegen/__init__.py
+++ b/src/pystencils/codegen/__init__.py
@@ -1,10 +1,10 @@
 from .target import Target
 from .config import (
     CreateKernelConfig,
-    CpuOptimConfig,
+    CpuOptions,
     VectorizationConfig,
-    OpenMpConfig,
-    GpuIndexingConfig,
+    OpenMpOptions,
+    GpuOptions,
     AUTO,
 )
 from .parameters import Parameter
@@ -14,10 +14,10 @@ from .driver import create_kernel, get_driver
 __all__ = [
     "Target",
     "CreateKernelConfig",
-    "CpuOptimConfig",
+    "CpuOptions",
     "VectorizationConfig",
-    "OpenMpConfig",
-    "GpuIndexingConfig",
+    "OpenMpOptions",
+    "GpuOptions",
     "AUTO",
     "Parameter",
     "Kernel",
diff --git a/src/pystencils/codegen/config.py b/src/pystencils/codegen/config.py
index 83602dbbc..4e17ccc48 100644
--- a/src/pystencils/codegen/config.py
+++ b/src/pystencils/codegen/config.py
@@ -77,7 +77,7 @@ class Option(Generic[Option_T, Arg_T]):
             return self._default
         else:
             return val
-        
+
     def is_set(self, obj) -> bool:
         return getattr(obj, self._lookup, None) is not None
 
@@ -102,8 +102,7 @@ class Option(Generic[Option_T, Arg_T]):
         delattr(obj, self._lookup)
 
 
-class SimpleOption(Option[Option_T, Option_T]):
-    ...
+class BasicOption(Option[Option_T, Option_T]): ...
 
 
 class ConfigBase(ABC):
@@ -111,7 +110,7 @@ class ConfigBase(ABC):
         """Get the value set for the specified option, or the option's default value if none has been set."""
         descr: Option = type(self).__dict__[name]
         return descr.get(self)
-    
+
     def is_option_set(self, name: str) -> bool:
         descr: Option = type(self).__dict__[name]
         return descr.is_set(self)
@@ -132,7 +131,7 @@ Category_T = TypeVar("Category_T", bound=ConfigBase)
 
 class Category(Generic[Category_T]):
     """Descriptor for a category of options.
-    
+
     This descriptor makes sure that when an entire category is set to an object,
     that object is copied immediately such that later changes to the original
     do not affect this configuration.
@@ -168,22 +167,25 @@ Currently, these options permit `AUTO`:
 
 
 @dataclass
-class OpenMpConfig:
+class OpenMpOptions(ConfigBase):
     """Parameters controlling kernel parallelization using OpenMP."""
 
-    nesting_depth: int = 0
+    enable: BasicOption[bool] = BasicOption(False)
+    """Enable OpenMP instrumentation"""
+
+    nesting_depth: BasicOption[int] = BasicOption(0)
     """Nesting depth of the loop that should be parallelized. Must be a nonnegative number."""
 
-    collapse: int = 0
+    collapse: BasicOption[int] = BasicOption()
     """Argument to the OpenMP ``collapse`` clause"""
 
-    schedule: str = "static"
+    schedule: BasicOption[str] = BasicOption("static")
     """Argument to the OpenMP ``schedule`` clause"""
 
-    num_threads: int | None = None
+    num_threads: BasicOption[int] = BasicOption()
     """Set the number of OpenMP threads to execute the parallel region."""
 
-    omit_parallel_construct: bool = False
+    omit_parallel_construct: BasicOption[bool] = BasicOption(False)
     """If set to ``True``, the OpenMP ``parallel`` construct is omitted, producing just a ``#pragma omp for``.
     
     Use this option only if you intend to wrap the kernel into an external ``#pragma omp parallel`` region.
@@ -197,62 +199,17 @@ class OpenMpConfig:
 
 
 @dataclass
-class CpuOptimConfig:
-    """Configuration for the CPU optimizer.
-
-    If any flag in this configuration is set to a value not supported by the CPU specified
-    in `CreateKernelConfig.target`, an error will be raised.
-    """
-
-    openmp: bool | OpenMpConfig = False
-    """Enable OpenMP parallelization.
-    
-    If set to `True`, the kernel will be parallelized using OpenMP according to the default settings in `OpenMpConfig`.
-    To customize OpenMP parallelization, pass an instance of `OpenMpConfig` instead.
-    """
-
-    vectorize: bool | VectorizationConfig = False
-    """Enable and configure auto-vectorization.
-    
-    If set to an instance of `VectorizationConfig` and a CPU target with vector capabilities is selected,
-    pystencils will attempt to vectorize the kernel according to the given vectorization options.
-
-    If set to `True`, pystencils will infer vectorization options from the given CPU target.
-
-    If set to `False`, no vectorization takes place.
-    """
-
-    loop_blocking: None | tuple[int, ...] = None
-    """Block sizes for loop blocking.
-    
-    If set, the kernel's loops will be tiled according to the given block sizes.
-    """
-
-    use_cacheline_zeroing: bool = False
-    """Enable cache-line zeroing.
-    
-    If set to `True` and the selected CPU supports cacheline zeroing, the CPU optimizer will attempt
-    to produce cacheline zeroing instructions where possible.
-    """
-
-    def get_vectorization_config(self) -> VectorizationConfig | None:
-        if self.vectorize is True:
-            return VectorizationConfig()
-        elif isinstance(self.vectorize, VectorizationConfig):
-            return self.vectorize
-        else:
-            return None
-
-
-@dataclass
-class VectorizationConfig:
+class VectorizationConfig(ConfigBase):
     """Configuration for the auto-vectorizer.
 
     If any flag in this configuration is set to a value not supported by the CPU specified
     in `CreateKernelConfig.target`, an error will be raised.
     """
 
-    lanes: int | None = None
+    enable: BasicOption[bool] = BasicOption(False)
+    """Enable intrinsic vectorization."""
+
+    lanes: BasicOption[int] = BasicOption()
     """Number of SIMD lanes to be used in vectorization.
 
     If set to `None` (the default), the vector register width will be automatically set to the broadest possible.
@@ -261,7 +218,9 @@ class VectorizationConfig:
     operation contained in the kernel with the given number of lanes, an error will be raised.
     """
 
-    use_nontemporal_stores: bool | Collection[str | Field] = False
+    use_nontemporal_stores: BasicOption[bool | Collection[str | Field]] = BasicOption(
+        False
+    )
     """Enable nontemporal (streaming) stores.
     
     If set to `True` and the selected CPU supports streaming stores, the vectorizer will generate
@@ -271,14 +230,14 @@ class VectorizationConfig:
     the given fields.
     """
 
-    assume_aligned: bool = False
+    assume_aligned: BasicOption[bool] = BasicOption(False)
     """Assume field pointer alignment.
     
     If set to `True`, the vectorizer will assume that the address of the first inner entry
     (after ghost layers) of each field is aligned at the necessary byte boundary.
     """
 
-    assume_inner_stride_one: bool = False
+    assume_inner_stride_one: BasicOption[bool] = BasicOption(False)
     """Assume stride associated with the innermost spatial coordinate of all fields is one.
     
     If set to `True`, the vectorizer will replace the stride of the innermost spatial coordinate
@@ -307,10 +266,48 @@ class VectorizationConfig:
 
 
 @dataclass
-class GpuIndexingConfig:
+class CpuOptions(ConfigBase):
+    """Configuration for the CPU optimizer.
+
+    If any flag in this configuration is set to a value not supported by the CPU specified
+    in `CreateKernelConfig.target`, an error will be raised.
+    """
+
+    openmp: Category[OpenMpOptions] = Category(OpenMpOptions())
+    """Options governing OpenMP-instrumentation.
+    """
+
+    vectorize: Category[OpenMpOptions] = Category(OpenMpOptions())
+    """Options governing intrinsic vectorization.
+    """
+
+    loop_blocking: BasicOption[tuple[int, ...]] = BasicOption()
+    """Block sizes for loop blocking.
+    
+    If set, the kernel's loops will be tiled according to the given block sizes.
+    """
+
+    use_cacheline_zeroing: BasicOption[bool] = BasicOption(False)
+    """Enable cache-line zeroing.
+    
+    If set to `True` and the selected CPU supports cacheline zeroing, the CPU optimizer will attempt
+    to produce cacheline zeroing instructions where possible.
+    """
+
+    def get_vectorization_config(self) -> VectorizationConfig | None:
+        if self.vectorize is True:
+            return VectorizationConfig()
+        elif isinstance(self.vectorize, VectorizationConfig):
+            return self.vectorize
+        else:
+            return None
+
+
+@dataclass
+class GpuOptions(ConfigBase):
     """Configure index translation behaviour for kernels generated for GPU targets."""
 
-    omit_range_check: bool = False
+    omit_range_check: BasicOption[bool] = BasicOption(False)
     """If set to `True`, omit the iteration counter range check.
     
     By default, the code generator introduces a check if the iteration counters computed from GPU block and thread
@@ -318,10 +315,10 @@ class GpuIndexingConfig:
     This check can be discarded through this option, at your own peril.
     """
 
-    block_size: tuple[int, int, int] | None = None
+    block_size: BasicOption[tuple[int, int, int]] = BasicOption()
     """Desired block size for the execution of GPU kernels. May be overridden later by the runtime system."""
 
-    manual_launch_grid: bool = False
+    manual_launch_grid: BasicOption[bool] = BasicOption(False)
     """Always require a manually specified launch grid when running this kernel.
     
     If set to `True`, the code generator will not attempt to infer the size of
@@ -329,7 +326,7 @@ class GpuIndexingConfig:
     The launch grid will then have to be specified manually at runtime.
     """
 
-    sycl_automatic_block_size: bool = True
+    sycl_automatic_block_size: BasicOption[bool] = BasicOption(True)
     """If set to `True` while generating for `Target.SYCL`, let the SYCL runtime decide on the block size.
 
     If set to `True`, the kernel is generated for execution via
@@ -343,24 +340,30 @@ class GpuIndexingConfig:
     """
 
 
+GhostLayerSpec = _AUTO_TYPE | int | Sequence[int | tuple[int, int]]
+
+
+IterationSliceSpec = int | slice | tuple[int | slice]
+
+
 @dataclass
-class CreateKernelConfig:
+class CreateKernelConfig(ConfigBase):
     """Options for create_kernel."""
 
-    target: Target = Target.GenericCPU
+    target: BasicOption[Target] = BasicOption(Target.GenericCPU)
     """The code generation target."""
 
-    jit: JitBase | None = None
+    jit: BasicOption[JitBase] = BasicOption()
     """Just-in-time compiler used to compile and load the kernel for invocation from the current Python environment.
     
     If left at `None`, a default just-in-time compiler will be inferred from the `target` parameter.
     To explicitly disable JIT compilation, pass `pystencils.no_jit <pystencils.jit.no_jit>`.
     """
 
-    function_name: str = "kernel"
+    function_name: BasicOption[str] = BasicOption("kernel")
     """Name of the generated function"""
 
-    ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None
+    ghost_layers: BasicOption[GhostLayerSpec] = BasicOption()
     """Specifies the number of ghost layers of the iteration region.
     
     Options:
@@ -376,7 +379,7 @@ class CreateKernelConfig:
         At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
     """
 
-    iteration_slice: None | int | slice | tuple[int | slice] = None
+    iteration_slice: BasicOption[IterationSliceSpec] = BasicOption()
     """Specifies the kernel's iteration slice.
 
     Example:
@@ -390,7 +393,7 @@ class CreateKernelConfig:
         At most one of `ghost_layers`, `iteration_slice`, and `index_field` may be set.
     """
 
-    index_field: Field | None = None
+    index_field: BasicOption[Field] = BasicOption()
     """Index field for a sparse kernel.
     
     If this option is set, a sparse kernel with the given field as index field will be generated.
@@ -401,10 +404,10 @@ class CreateKernelConfig:
 
     """Data Types"""
 
-    index_dtype: UserTypeSpec = DEFAULTS.index_dtype
+    index_dtype: Option[PsIntegerType, UserTypeSpec] = Option(DEFAULTS.index_dtype)
     """Data type used for all index calculations."""
 
-    default_dtype: UserTypeSpec = PsIeeeFloatType(64)
+    default_dtype: Option[PsScalarType, UserTypeSpec] = Option(DEFAULTS.numeric_dtype)
     """Default numeric data type.
     
     This data type will be applied to all untyped symbols.
@@ -412,14 +415,14 @@ class CreateKernelConfig:
 
     """Analysis"""
 
-    allow_double_writes: bool = False
+    allow_double_writes: BasicOption[bool] = BasicOption(False)
     """
     If True, don't check if every field is only written at a single location. This is required
     for example for kernels that are compiled with loop step sizes > 1, that handle multiple
     cells at once. Use with care!
     """
 
-    skip_independence_check: bool = False
+    skip_independence_check: BasicOption[bool] = BasicOption(False)
     """
     By default the assignment list is checked for read/write independence. This means fields are only written at
     locations where they are read. Doing so guarantees thread safety. In some cases e.g. for
@@ -428,17 +431,33 @@ class CreateKernelConfig:
 
     """Target-Specific Options"""
 
-    cpu_optim: None | CpuOptimConfig = None
-    """Configuration of the CPU kernel optimizer.
-    
-    If this parameter is set while `target` is a non-CPU target, an error will be raised.
-    """
-
-    gpu_indexing: None | GpuIndexingConfig = None
-    """Configure index translation for GPU kernels.
-    
-    It this parameter is set while `target` is not a GPU target, an error will be raised.
-    """
+    cpu_optim: Category[CpuOptions] = Category(CpuOptions())
+    """Options for CPU kernels."""
+
+    gpu_indexing: Category[GpuOptions] = Category(GpuOptions())
+    """Options for GPU Kernels."""
+
+    @index_dtype.validate
+    def validate_index_type(self, spec: UserTypeSpec):
+        dtype = create_type(spec)
+        if not isinstance(dtype, PsIntegerType):
+            raise ValueError("index_dtype must be an integer type")
+        return dtype
+
+    @default_dtype.validate
+    def validate_default_dtype(self, spec: UserTypeSpec):
+        dtype = create_type(spec)
+        if not isinstance(dtype, PsScalarType):
+            raise ValueError("default_dtype must be a scalar numeric type")
+        return dtype
+
+    @index_field.validate
+    def validate_index_field(self, idx_field: Field):
+        if idx_field.field_type != FieldType.INDEXED:
+            raise ValueError(
+                "Only fields of type FieldType.INDEXED can be used as index fields"
+            )
+        return idx_field
 
     #   Deprecated Options
 
@@ -457,20 +476,23 @@ class CreateKernelConfig:
     #   Getters
 
     def get_target(self) -> Target:
-        match self.target:
+        t: Target = self.get_option("target")
+        match t:
             case Target.CurrentCPU:
                 return Target.auto_cpu()
             case _:
-                return self.target
+                return t
 
     def get_jit(self) -> JitBase:
         """Returns either the user-specified JIT compiler, or infers one from the target if none is given."""
-        if self.jit is None:
-            if self.target.is_cpu():
+        jit: JitBase | None = self.get_option("jit")
+
+        if jit is None:
+            if self.get_target().is_cpu():
                 from ..jit import LegacyCpuJit
 
                 return LegacyCpuJit()
-            elif self.target == Target.CUDA:
+            elif self.get_target() == Target.CUDA:
                 try:
                     from ..jit.gpu_cupy import CupyJit
 
@@ -487,7 +509,7 @@ class CreateKernelConfig:
 
                     return no_jit
 
-            elif self.target == Target.SYCL:
+            elif self.get_target() == Target.SYCL:
                 from ..jit import no_jit
 
                 return no_jit
@@ -496,64 +518,14 @@ class CreateKernelConfig:
                     f"No default JIT compiler implemented yet for target {self.target}"
                 )
         else:
-            return self.jit
+            return jit
 
     #   Postprocessing
 
     def __post_init__(self, *args):
-
         #   Check deprecated options
         self._check_deprecations(*args)
 
-        #   Check index data type
-        if not isinstance(create_type(self.index_dtype), PsIntegerType):
-            raise PsOptionsError("`index_dtype` was not an integer type.")
-
-        #   Check iteration space argument consistency
-        if (
-            int(self.iteration_slice is not None)
-            + int(self.ghost_layers is not None)
-            + int(self.index_field is not None)
-            > 1
-        ):
-            raise PsOptionsError(
-                "Parameters `iteration_slice`, `ghost_layers` and 'index_field` are mutually exclusive; "
-                "at most one of them may be set."
-            )
-
-        #   Check index field
-        if (
-            self.index_field is not None
-            and self.index_field.field_type != FieldType.INDEXED
-        ):
-            raise PsOptionsError(
-                "Only fields with `field_type == FieldType.INDEXED` can be specified as `index_field`"
-            )
-
-        #   Check optim
-        if self.cpu_optim is not None:
-            if (
-                self.cpu_optim.vectorize is not False
-                and not self.target.is_vector_cpu()
-            ):
-                raise PsOptionsError(
-                    f"Cannot enable auto-vectorization for non-vector CPU target {self.target}"
-                )
-
-        if self.gpu_indexing is not None:
-            if isinstance(self.gpu_indexing, str):
-                match self.gpu_indexing:
-                    case "block":
-                        self.gpu_indexing = GpuIndexingConfig()
-                    case "line":
-                        raise NotImplementedError(
-                            "GPU line indexing is currently unavailable."
-                        )
-                    case other:
-                        raise PsOptionsError(
-                            f"Invalid value for option gpu_indexing: {other}"
-                        )
-
     def _check_deprecations(
         self,
         data_type: UserTypeSpec | None,
@@ -561,7 +533,7 @@ class CreateKernelConfig:
         cpu_vectorize_info: dict | None,
         gpu_indexing_params: dict | None,
     ):  # pragma: no cover
-        optim: CpuOptimConfig | None = None
+        optim: CpuOptions | None = None
 
         if data_type is not None:
             _deprecated_option("data_type", "default_dtype")
@@ -575,20 +547,20 @@ class CreateKernelConfig:
         if cpu_openmp is not None:
             _deprecated_option("cpu_openmp", "cpu_optim.openmp")
 
-            deprecated_omp: OpenMpConfig | bool
+            deprecated_omp: OpenMpOptions | bool
             match cpu_openmp:
                 case True:
-                    deprecated_omp = OpenMpConfig()
+                    deprecated_omp = OpenMpOptions()
                 case False:
                     deprecated_omp = False
                 case int():
-                    deprecated_omp = OpenMpConfig(num_threads=cpu_openmp)
+                    deprecated_omp = OpenMpOptions(num_threads=cpu_openmp)
                 case _:
                     raise PsOptionsError(
                         f"Invalid option for `cpu_openmp`: {cpu_openmp}"
                     )
 
-            optim = CpuOptimConfig(openmp=deprecated_omp)
+            optim = CpuOptions(openmp=deprecated_omp)
 
         if cpu_vectorize_info is not None:
             _deprecated_option("cpu_vectorize_info", "cpu_optim.vectorize")
@@ -637,7 +609,7 @@ class CreateKernelConfig:
             if optim is not None:
                 optim = replace(optim, vectorize=deprecated_vec_opts)
             else:
-                optim = CpuOptimConfig(vectorize=deprecated_vec_opts)
+                optim = CpuOptions(vectorize=deprecated_vec_opts)
 
         if optim is not None:
             if self.cpu_optim is not None:
@@ -655,7 +627,7 @@ class CreateKernelConfig:
                     "Cannot specify both `gpu_indexing` and the deprecated `gpu_indexing_params` at the same time."
                 )
 
-            self.gpu_indexing = GpuIndexingConfig(
+            self.gpu_indexing = GpuOptions(
                 block_size=gpu_indexing_params.get("block_size", None)
             )
 
diff --git a/src/pystencils/codegen/driver.py b/src/pystencils/codegen/driver.py
index 28b685b55..47bcb905d 100644
--- a/src/pystencils/codegen/driver.py
+++ b/src/pystencils/codegen/driver.py
@@ -3,12 +3,13 @@ from typing import cast, Sequence, Iterable, TYPE_CHECKING
 from dataclasses import dataclass, replace
 
 from .target import Target
-from .config import CreateKernelConfig, OpenMpConfig, VectorizationConfig, AUTO
+from .config import CreateKernelConfig, OpenMpOptions, VectorizationConfig, AUTO, GhostLayerSpec, IterationSliceSpec
 from .kernel import Kernel, GpuKernel, GpuThreadsRange
 from .properties import PsSymbolProperty, FieldShape, FieldStride, FieldBasePtr
 from .parameters import Parameter
 
-from ..types import create_numeric_type, PsIntegerType, PsScalarType
+from ..field import Field
+from ..types import PsIntegerType, PsScalarType
 
 from ..backend.memory import PsSymbol
 from ..backend.ast import PsAstNode
@@ -105,15 +106,36 @@ class DefaultKernelCreationDriver:
     def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False):
         self._cfg = cfg
 
-        idx_dtype = create_numeric_type(self._cfg.index_dtype)
-        assert isinstance(idx_dtype, PsIntegerType)
+        #   Data Type Options
+        idx_dtype: PsIntegerType = cfg.get_option("index_dtype")
+        default_dtype: PsScalarType = cfg.get_option("default_dtype")
+
+        #   Iteration Space Options
+        num_ispace_options_set = (
+            int(cfg.is_option_set("ghost_layers"))
+            + int(cfg.is_option_set("iteration_slice"))
+            + int(cfg.is_option_set("index_field"))
+        ) 
+        
+        if num_ispace_options_set > 1:
+            raise ValueError(
+                "At most one of the options 'ghost_layers' 'iteration_slice' and 'index_field' may be set."
+            )
+        
+        self._ghost_layers: GhostLayerSpec | None = cfg.get_option("ghost_layers")
+        self._iteration_slice: IterationSliceSpec | None = cfg.get_option("iteration_slice")
+        self._index_field: Field | None = cfg.get_option("index_field")
+
+        if num_ispace_options_set == 0:
+            self._ghost_layers = AUTO
 
+        #   Create the context
         self._ctx = KernelCreationContext(
-            default_dtype=create_numeric_type(self._cfg.default_dtype),
+            default_dtype=default_dtype,
             index_dtype=idx_dtype,
         )
 
-        self._target = self._cfg.get_target()
+        self._target = cfg.get_target()
         self._platform = self._get_platform()
 
         self._intermediates: CodegenIntermediates | None
@@ -153,7 +175,7 @@ class DefaultKernelCreationDriver:
             self._intermediates.constants_eliminated = kernel_ast.clone()
 
         #   Target-Specific optimizations
-        if self._cfg.target.is_cpu():
+        if self._target.is_cpu():
             kernel_ast = self._transform_for_cpu(kernel_ast)
 
         #   Note: After this point, the AST may contain intrinsics, so type-dependent
@@ -174,13 +196,13 @@ class DefaultKernelCreationDriver:
         canonicalize = CanonicalizeSymbols(self._ctx, True)
         kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
 
-        if self._cfg.target.is_cpu():
+        if self._target.is_cpu():
             return create_cpu_kernel_function(
                 self._ctx,
                 self._platform,
                 kernel_ast,
-                self._cfg.function_name,
-                self._cfg.target,
+                self._cfg.get_option("function_name"),
+                self._target,
                 self._cfg.get_jit(),
             )
         else:
@@ -189,8 +211,8 @@ class DefaultKernelCreationDriver:
                 self._platform,
                 kernel_ast,
                 gpu_threads,
-                self._cfg.function_name,
-                self._cfg.target,
+                self._cfg.get_option("function_name"),
+                self._target,
                 self._cfg.get_jit(),
             )
 
@@ -272,8 +294,8 @@ class DefaultKernelCreationDriver:
 
             params = (
                 cpu_cfg.openmp
-                if isinstance(cpu_cfg.openmp, OpenMpConfig)
-                else OpenMpConfig()
+                if isinstance(cpu_cfg.openmp, OpenMpOptions)
+                else OpenMpOptions()
             )
             add_omp = AddOpenMP(self._ctx, params)
             kernel_ast = cast(PsBlock, add_omp(kernel_ast))
diff --git a/tests/codegen/test_config.py b/tests/codegen/test_config.py
index 613c279c3..715830e70 100644
--- a/tests/codegen/test_config.py
+++ b/tests/codegen/test_config.py
@@ -1,16 +1,20 @@
+import pytest
+
 from dataclasses import dataclass
-from pystencils.codegen.config import SimpleOption, Option, Category, ConfigBase
+import numpy as np
+from pystencils.codegen.config import BasicOption, Option, Category, ConfigBase, CreateKernelConfig
+from pystencils.types.quick import Int, UInt
 
 
 def test_descriptors():
 
     @dataclass
     class SampleCategory(ConfigBase):
-        val1: SimpleOption[int] = SimpleOption(2)
+        val1: BasicOption[int] = BasicOption(2)
         val2: Option[bool, str] = Option(False)
 
         @val2.validate
-        def _val2(self, v: str):
+        def validate_val2(self, v: str):
             if v.lower() in ("off", "false", "no"):
                 return False
             elif v.lower() in ("on", "true", "yes"):
@@ -21,7 +25,7 @@ def test_descriptors():
     @dataclass
     class SampleConfig(ConfigBase):
         cat: Category[SampleCategory] = Category(SampleCategory())
-        val: SimpleOption[str] = SimpleOption("fallback")
+        val: BasicOption[str] = BasicOption("fallback")
 
     cfg = SampleConfig()
     
@@ -55,3 +59,16 @@ def test_descriptors():
     assert cfg.cat is not c
     c.val1 = 13
     assert cfg.cat.val1 == 32
+
+
+def test_config_validation():
+    cfg = CreateKernelConfig(index_dtype="int32")
+    assert cfg.index_dtype == Int(32)
+    cfg.index_dtype = np.uint64
+    assert cfg.index_dtype == UInt(64)
+
+    with pytest.raises(ValueError):
+        _ = CreateKernelConfig(index_dtype=np.float32)
+
+    with pytest.raises(ValueError):
+        cfg.index_dtype = "double"
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 7c9521614..8c7f12015 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -49,7 +49,7 @@ def gen_config(target: ps.Target):
     if target.is_vector_cpu():
         gen_config = replace(
             gen_config,
-            cpu_optim=ps.CpuOptimConfig(
+            cpu_optim=ps.CpuOptions(
                 vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
             ),
         )
diff --git a/tests/kernelcreation/test_iteration_slices.py b/tests/kernelcreation/test_iteration_slices.py
index fee3544f8..5c7b4d8cb 100644
--- a/tests/kernelcreation/test_iteration_slices.py
+++ b/tests/kernelcreation/test_iteration_slices.py
@@ -13,7 +13,7 @@ from pystencils import (
     make_slice,
     Target,
     CreateKernelConfig,
-    GpuIndexingConfig,
+    GpuOptions,
     DynamicType,
 )
 from pystencils.sympyextensions.integer_functions import int_rem
@@ -141,7 +141,7 @@ def test_triangle_pattern(gen_config: CreateKernelConfig, xp):
 
     if gen_config.target == Target.CUDA:
         gen_config = replace(
-            gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
+            gen_config, gpu_indexing=GpuOptions(manual_launch_grid=True)
         )
 
     kernel = create_kernel(update, gen_config).compile()
@@ -174,7 +174,7 @@ def test_red_black_pattern(gen_config: CreateKernelConfig, xp):
 
     if gen_config.target == Target.CUDA:
         gen_config = replace(
-            gen_config, gpu_indexing=GpuIndexingConfig(manual_launch_grid=True)
+            gen_config, gpu_indexing=GpuOptions(manual_launch_grid=True)
         )
 
     try:
diff --git a/tests/nbackend/kernelcreation/test_openmp.py b/tests/nbackend/kernelcreation/test_openmp.py
index d7be8eb98..ae775ca20 100644
--- a/tests/nbackend/kernelcreation/test_openmp.py
+++ b/tests/nbackend/kernelcreation/test_openmp.py
@@ -4,8 +4,8 @@ from pystencils import (
     Assignment,
     create_kernel,
     CreateKernelConfig,
-    CpuOptimConfig,
-    OpenMpConfig,
+    CpuOptions,
+    OpenMpOptions,
     Target,
 )
 
@@ -21,14 +21,14 @@ def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
     f, g = fields("f, g: [3D]")
     asm = Assignment(f.center(0), g.center(0))
 
-    omp = OpenMpConfig(
+    omp = OpenMpOptions(
         nesting_depth=nesting_depth,
         schedule=schedule,
         collapse=collapse,
         omit_parallel_construct=omit_parallel_construct,
     )
     gen_config = CreateKernelConfig(
-        target=Target.CPU, cpu_optim=CpuOptimConfig(openmp=omp)
+        target=Target.CPU, cpu_optim=CpuOptions(openmp=omp)
     )
 
     kernel = create_kernel(asm, gen_config)
diff --git a/tests/test_quicktests.py b/tests/test_quicktests.py
index 5d5dba0ea..3e7f4f071 100644
--- a/tests/test_quicktests.py
+++ b/tests/test_quicktests.py
@@ -74,7 +74,7 @@ def test_basic_vectorization():
     ast = ps.create_kernel(
         update_rule,
         target=target,
-        cpu_optim=ps.CpuOptimConfig(
+        cpu_optim=ps.CpuOptions(
             vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
         ),
     )
-- 
GitLab


From d8610f6bf4a25fac4dd04aaad1f3cad4b0a484d6 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Mon, 20 Jan 2025 14:09:06 +0100
Subject: [PATCH 3/7] Fix deepcopy of categories. Remove mentions of `config`
 from the backend. Adapt test suite.

---
 .../01_tutorial_getting_started.ipynb         | 298 +++++++++---------
 src/pystencils/__init__.py                    |   4 +-
 .../backend/kernelcreation/iteration_space.py |  64 +++-
 src/pystencils/backend/platforms/cuda.py      |  16 +-
 src/pystencils/backend/platforms/sycl.py      |  24 +-
 .../backend/transformations/add_pragmas.py    |  37 ++-
 src/pystencils/codegen/__init__.py            |   4 +-
 src/pystencils/codegen/config.py              | 160 ++++++----
 src/pystencils/codegen/driver.py              | 107 +++++--
 src/pystencils/types/types.py                 |   2 +-
 tests/codegen/test_config.py                  |  88 +++++-
 tests/fixtures.py                             |  17 +-
 tests/kernelcreation/test_buffer_gpu.py       |   4 +-
 tests/kernelcreation/test_gpu.py              |   6 +-
 tests/kernelcreation/test_iteration_slices.py |  13 +-
 tests/nbackend/kernelcreation/test_openmp.py  |   7 +-
 tests/nbackend/kernelcreation/test_options.py |  28 --
 .../transformations/test_add_pragmas.py       |   1 +
 tests/test_quicktests.py                      |   4 +-
 19 files changed, 524 insertions(+), 360 deletions(-)
 delete mode 100644 tests/nbackend/kernelcreation/test_options.py

diff --git a/docs/source/tutorials/01_tutorial_getting_started.ipynb b/docs/source/tutorials/01_tutorial_getting_started.ipynb
index 5ce765fce..04dc50e51 100644
--- a/docs/source/tutorials/01_tutorial_getting_started.ipynb
+++ b/docs/source/tutorials/01_tutorial_getting_started.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -52,7 +52,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -63,14 +63,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "4.74 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+      "3.91 ms ± 88.9 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
      ]
     }
    ],
@@ -88,22 +88,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdoAAAAnCAYAAACon4ncAAAACXBIWXMAAA7EAAAOxAGVKw4bAAANI0lEQVR4Ae2d7bXctBaGJ2elgAAdcDvILYHQAZAKEjoIi3/5x+J2AFQQoAO4FYSbDqADwung3PfRWI4sW7bk8Yc8s7WWR19bW9K7JW9vSWM/enh4OJmrE4HXr19/r5b93bTuX/K/Udo9cflP5T3X9VThz3V9qvA3usj/U/Ef5Xec0kJ+0P2htHcdIos4BCKsLsI+4mW4j4yxCKuLcKeaiJ9hP4L9nKwI34vkFfG6KlndzQHXyqyPgAbdb6rljfz/cCn8iy4UpXfPlY5iPcn/Qd7X8r9uMkM68j/V9afy3sr3/P6teIeuKXvznjBaBHvDvWwoLYU7tRr2ZdjPoV5KXrcgK1O0c0bYymUYeKrimfzQ2kSp/o+qm3wUJ+5jXdA6pavwE12xNYvi+FU0v8r37isFSO840XzR8O+kLxUR71dL8VqDT9P3pbA33DOFtDDu1FoN9pkQdMhsnpx2uT91hJAZyZHV40xeRrYDAhIgNwss2d8V/jxoAsr05ybOErK3ZE+ia8PkK461i+L+jrh3Sv/Ih72vtJcK/yX/ryCNuhj0WMxYwZNOdN5SZtn7E8X9Q8BJYSzqH3R12jnJdGMCte8i7OmjmnwJ7kkMU1CozqSslHcTuIPNHtinZDKWbvK6XFZj+KbyxnAfKZOcj+I3ObfuUowtfT8EJDgUHcvFz3Rxw2bP1Qv6pPA7Xfe6yMd5pXuOdX9Rkijq+25yN6Z8btLs9f7ucxRGiaN8yeOadCoD/d/y2U+mD7/JR2mF7heltf0JM/YOq11LYX8J7jkYdqBSu3NkdQu4g8um2HcEkRkxebVAzZZVy6EgkIl7h6PK5MzH0bllirYDaT0RCRdF9UgtwpJF+b1SPFZ2XyodC/Refs819JSJFV2PVgkoPpR661QehY6ybC3cNjMdgE+orAmzFIt155zCPi3ujyfZ1Vf7LsJe5enXbNxVdhLDGCDVOSmra8cdTPbAPpZFTtzktYiscqDu0OTg3ilwjkzOR/EdvaeZoh1Adc8kCex7XX7/9YQAdYXLxmHzsGjDfdcwj7L3TYL343xvEZOOMmwVZIcwM6LyKFMUTKyYqR+LK3RuYIYJe4fV/kWwvwT3QgznQHa1uANG5dibvAIELpFVwGbVYOF8TM4tU7SrimkWc79c3BaWsL9Q5MdgYJ6aAYBim7JWORiF5dtxKs9T2nsSG16DyrhTaDqCkh1y1PNxlPFW8dQDRES6WXRJ7OfiXoLhHGCuHXcwqRV7k1cfgbmy6nNaJ6VkPibn1uN12mZcL0DghcpiXfq9UVixHxsfHkLJsmw8aoVSTheWGorVW8oMnu+Udi8fBy+ndF1s+R+UbDxgqZt6a3KLYb8C7kMYzsHuqnEHkIqxN3lFCKwgq6iG1aJD8zE5t0zRriaHeYw18PhLD9eoEx0Klj+ITzrRtqd+E8QoQQbJpS7FA/7xcvKain1WP5bGfibuJRjO6efV4w4oW2Kvup6qSh5kpxwPvfE8mCpz9fKaKavTyrh7uZTMx6SsRhVt05GfVCOWx8+Kx1aVb4z5x0aAyY8yvMhpfPiDWYyXdxGzOA5N6U0nYnn4aA/3QgznAGC4n1FbDHvJjLG91jaIyet8n+jdn1bG3Y2SwvmYlNXd2EylI7r47ySdnNoLHGSl8k908fcUGmGuQgQkG24UxfJpZBs/yfN/XfY6nRMNYV6WEStV6vNL2WfiG/sdwX0UwwTuuejdPO4AtRP2uTIK6W5eXiOyCnFaJJyYW6PzMag4Kau7gGgwqIr9TXN0L3Cw8DmR8jTg/QiNZe2PANYocmodcV0o0m91ubDir1qCs1xfKo3DWs4pzN+BeEkF+8LQchCLvc/YYQEkT0zHxFNx1eX3tadIa8sfwn0KQ+QU4z4lK99vw90jcT7jEI/5Yuw/sMsPabyavPLhgrI3T8qKn6kzcB+aW1NjwjclObcee4oRn8J+SXCELJl1afkk4y0zJCAOJ13zS/hRqGwNtPu56jNWaBtXuOOUjyX8kfxW0UKgeLJMk++WgUQXW7lkz3Xw7C0vzWW2Ybke7tQ9hqHyerg3WBruZYJbBPuyKs/UJq9i1AZlVcplCvehuUUdSr9obt1lNBSLdK41C3vKL2a5wHAnhxJaUjHs1I3hajWQkDHL/J0n/GHqD6minzM+/KT5wOhGQ4b7foLfGPs5HbV50qA2V1ZzQF/jntZRtKqA5Qz3Kin5vImI03Rcg/uzynfLgw0t76/1lgrLePChHDduPuVGPFx2VPIxnNqNMrnEqj9ER9VPHiY4GVliGTJm7nM7KFpWBhgLV/vQkouFpxMWhrsHY2N/C+zndMnmSR+1mbLqM5pOWfye1ipadQJlwtdheP2cf08tT1S4nkUrGpToG/m8UJk1bF5+7+gV521G7M25V/opzDt0v9QF3aGc2ozSoV8vDtXwmY1Vf0eXSGK2oucP5yWO0+u98VTC4BppDff9pLoB9nM6Z/NkALVSWQ2wmExSHYvf05yiFWOUCYoSBRtaGoTdC+zD1okGKxWrlb0i77hBu8+4+QT57M+GNEHWOSge1X6WTW3Dmv+vrhcKZ1ttvU4eLGHNvq7J+2Aw95q7JjZr8u515IAJteFTW3tqEmlt2OS059HDw8NJhFieL+XzEvvWKc7fL/hrRsfKURxFSx6WCQoaCzZU0EpyG8iD5V3mOZ9lRJZkWwtHYW9F9z6x5svF/lQZ5Rd/lq3hyVI3CvZ9XGdBnIeXa9ijLujyeqTIUtxZfYndx03CkKx4WOy9hjJmYPE0AoZ7Gpsac0xedUnFK9p/1CxO1GKBOqcwVi7pnU+nnXOdEkUhhnuuLCG3CjmjPPx/El17A1TYvXZQvltils8NFUXVtsvX7/2cMg0f+tG2z5cf8xve7J99pvDqFq3qeBhrzy3kCYPOw15un1WOk8/srczanjDs3Zwuxt5wzx2hy9IJ92JZ0QKT17JyyOX2WMCj8LjiA09fwUT5rbVJ3Dulo7RQgk4ZKszhqfD9uc7qCMsrzKlWr7BQ1G4P1/OUT9pnPk5ZXXzPlBtoz2Ju6CbLNHw4uBXW76tJ+qLnRf5/iIDl46yPnieZZWSorlmTJ4O1kUwgYNhPALRStuG+ErArsTV5zQPW7dE2RWNF1u6vCtx2H1VhFBZLws4pnPqMW1seQtE5i+Ncyv0+o6yPK8xyNAo/bgeKmb3SnissQ11DS449vmGC6mCPmaVnFLo5Q8AQMAQMAUOgCIE7KRAUGUoIReec0lCKKDesORzLrl4Boqw6lmhD3/mMm2jYM3NllI8CxSp1B6PkUxf1hg6aIceem99/i/NLyiQ/YRQzjeNqL6fQOi9liGksbggYAoaAIWAIDCHwuElkn5T9UvZcP9H1RhcWqfufrPxQsb5QHGvU7acqjLtXnL3M0LG0jCUI3Ul+uHeGoh06tAJp7FCyKYUa0/r4UBkUe/sw4QkLfA6FdazwgrJGaggYAoaAIXCjCDhFKwWCEmoPJQVY9NJEi1XqLNOArhcUHdYsynrIoTipM3Rx3OdB661pn+b9kjK5it3z7vjqT9FBqk5hixgChoAhYAjcLAJ3O/UcxYkCbV2jmFGcQ1bnoGIvLAPflMJu23HrAWHKW7zCFYyjQMLYST14HaIPB8X+8LiPDY6DymSsS1crr5pl5ZeOxwSzeJ4A4X+NQwr1O1XGHrBTrKIhzJKtU5Dynyj+rfzQuhwtI3rvqK89xOUTze8hwP+iBx9sepQVJWhMtAfrKmpWaVMOh/2V4D4mp8PJZKwzVy6vamV1NyaUlfN6nz3SIGAfd+wTayjLl6JrDyZllPHdYBnbXhzh0RjwhWX4v+gBCktaCwHDfi1k5/M1mczHbuuStctqF4u2EQJ/l+EAVWidngRYJx4KTHlYWrV8li1s2uHDwvapOnG1y0o1C8iwr086JpP6ZJJq0RFktZtFK3BY6rPPsqVGz/bpzyWT0pdpb9/K66zRsK9PriaT+mSSalH1stpN0YKYbuxYtPZZttTw2ShdcmDJ+IgHoDZCaL1qDPv1sJ3L2WQyF7ntyx1FVrsqWsQioJJLxUNiE32p1WWfmxoCskkTnux78z9oO5E9gtMaWYb9GqhextNkchl+W5Y+kqx2V7QIRoCxN7iKW5P3Kg3enikrCqUPL9u38jprNOzrk6vJpD6ZpFp0GFlVoWhTKFr6ughIwfLWLlsyXhfmQe6G/SAsuyaaTHaFv6jyo8nKFG2ReK+HWAOVJWMOo9mS8cZiNew3BjyjOpNJBkiVkBxRVu57tJXgZ83YEAENVl4GwmG02PEfZZbyORXOf52L9tBjZhbvI2DY9zHZO8VksrcE8us/oqxM0ebL9yYoNYj/UUf59GHvPdc3AcCOnTTsdwQ/UbXJJAFMhck1y8qWjiscMDs3iddccpnbHgHDfnvMp2o0mUwhVE9+tbIyi7aeQbJrS/Q0yKEo9m1ZUsbxusq3Sg8/b+gy7GdZBAz7ZfFcgpvJZAkUt+FxBFn9HxdoFR2CYF4nAAAAAElFTkSuQmCC",
       "text/latex": [
        "$\\displaystyle {dst}_{(0,0)} \\leftarrow_{} \\frac{{src}_{(1,0)}}{4} + \\frac{{src}_{(0,1)}}{4} + \\frac{{src}_{(0,-1)}}{4} + \\frac{{src}_{(-1,0)}}{4}$"
       ],
       "text/plain": [
-       "         src_E   src_N   src_S   src_W\n",
-       "dst_C := ───── + ───── + ───── + ─────\n",
-       "           4       4       4       4  "
+       "Assignment(dst_C, src_E/4 + src_N/4 + src_S/4 + src_W/4)"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -118,12 +115,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAARUAAAEnCAYAAACHXNdEAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAPWklEQVR4nO3df5DU9X3H8df+ut37sdzBnhxgFWTEQK6RXzpT0xLwRkioYmYcZrSmYslYtZm20kmmQtXO2Ngpf6C1SfAnYvQmyehojVEz1jhtbceJMhYLoRHPE0GQAPcD7o7jdm93v9/O93v8ENjf9wYO9/mY2bld7u6768f9Pu/z/ez39gKu67oCACNBqw0BAFEBYI6ZCgBTYdvN4XzXn3b0u5Sj4RJW2kKSJkaDaq7hZxNOICrwbRvI6J93DunXBzPKljkmX2kI6c8viunrF9QwmlCAV3/w24GMVmw9rIFs5S8EenOVdTPrde1EwlLtmLdCT+1JjiooHkfSD3YNMZogKtUu67r69560ybZ2DjnqHCz34AlfNMxUqlx/xtURb5phxFvkRXUjKlUubdyANCdoVz2igoIO/Xyjdt22SB1tE9X99FpGC0URFRQUTrQosfJuxRcuY6RQEs5TQUENC671Pw6+8ytGCiVhpgLAFFEBYIqoADBFVACYIiooyM1k5KSScrNZKZs9cR3Ig6igoJ72depcMkX9r7Wrt/3BketvPMeoIS9+S7nKHUg5WvBun9n2Hm2tV1uC31SuZsxUAJgiKgBMcUYt8upYOKHo6Fz2Vi8jiJMQFeRFMFAJDn9QkqFtm9SxKKGeZ9cxYiiIqKAo13HUtf4exWbOZbRQFIc/KKrvlWcUmzVfzuAAo4WimKmgoGxfrw6+8JgSK9cwUigJUUFB3Rse0PjldyoUb2SkUBKigrySHVuV3P6+Gq9bwSihZKypIK+hLW9reHendixv9W87h/ulUFjpzz7RpDXrGTnkRFSQV+OyWxVvu0H9/f3q7upS5F9/pIZLvqQJN69i1JAXUUFewVidfxkYGJIaE0orqECsjvUVFERUUFAqmVQqlRq5sfI+RVpaGDEUxEItCjrUd+jk24dOvg2ciqggL8dx1N/Xf9K/ebMWb/YC5ENUqlykwDNgoL9fbo4/Y3rq7OWk7QUCVg8N5ymiUuXGhQOqD+X+XL5DHW/24s1icpkc5SlV7XgGVLlQIKCrJ0QKL9Cewpu9eLOYU02rDerSfIVC1SAq0G0XxfwZS6mHOLlmMd4TadW0WkYTRAXSrIawfvyVBn1tfFheW3It0J7q2IKtl6I58ZB+8OV6Lb2AN7wG76aPUxzOuPrt/h4t/sYf+2fS5hMIBvX8z36qq+ddrkQNE16cwJ/oQE59fX1KHn3peNu2bbrmmmt000036eGHH/b/LRQKqbm5mdHDaTijFjk1Njb6F8++ffv8j01NTWrhjFoUwbwVgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioAiAqAsYuZCgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYCpc7Av2JrPaccRRynGLbiwYkBKRoFrjIYUCAavHCOAM+Wgwq9+lHKVL2L8jwYAmR4OaUR+qLCpb+zP6h84j+s3hbNkPNBEJ6E+nRPWdqbVlfy+AM++XB4b18M4h7Uo6ZX/v1FhQd02r1bUTa0qPilevb//msAayxeuVS0/a1b/sSirtyr9zAGPHG13D+u72QZWfkxFeiL63fVCRgLTkgprS1lR+ujdVcVA+78d7kkoZbAeAnQ17khUH5Rjn6HZyyRmV/+gdloUjjvRuX8ZkWwBGr2fY0ZaB8pc0cvG20z3slBaVAym72UVXjjsFcG5Y748lR8WmYyPSNAUYM6xXIzKuwXkqh36+UbtuW6SOtonqfnqt0UMDMBZY7N9lRyWcaFFi5d2KL1xW0R0CGLss9u+iJ7+dqmHBtf7HwXd+VfGdAhibLPZvTtMHYIqoADBFVACYIioAzm1U3ExGTiopN5uVstkT1yu0c+dOvf7663JdTucHRmPTpk3avHnzqLZhsX+XHZWe9nXqXDJF/a+1q7f9wZHrbzxX1jbS6bReeuklLVmyRNOnT9fSpUv15ptvlvtQABzV1dWlq666SvPnz9fcuXP15JNPamBgQOdi/y77JeXmlav9S6Wzkg0bNuiJJ57wByEUCh2foaRSqYq2CUDKerMKZ+T09S1btuj222/XXXfdpVtuuUV33HGH5s2bd8b377O2ppLJZk6alaxdu9YPyrGBAGDr2A/qoaEhbdy40Z+9zJkzx5+9DA4O6kwLuDkWM770XwdNNt7d3aX00/+ogVef9WclhSJyxRVXlFxTnF09PT168cUXNWvWLC1YsIDhH4MOHTqk559/Pu/nA0ffibFmxuWKff9napk0yeR+X5wb1+/Hw5Uf/nQsnFD0ay57q/f49d6eXqm/v6RZyXvvvedfMHZ98MEH/gXnH/fYMkMyqVRfX86olLt/51NWVErZ4OddeOGFap4zR1v/+2UFg8GCYXnooYfU1tZW1vZxdnz44Ye68cYbtXz5ct17770M+xjU1dWlxYsX5/18OBxWJpPRl1tb1ZNnllLu/p33vir9xqFtm7T7L5cq8e01Sqz4Xs6vqW9o0Oq7V+ur312pp556So8//rj279+f81BoxowZmj17dqUPB2dBc3Mz/4/GqH379uU85PFmKPX19VqxYoW/YBue3qob3h8w2b9NF2pdx1HX+nsUmzm3pK+/+OKLdf/992vPnj16+eWX/UVb7z/YiwsAW96sxOO9tOz9MPd+kD/yyCMl/0Aod/8+7f4r+aa+V55RbNZ8OYMDZf/HXn/99f7l008/PT578RYCW1paKnkoACTFYjE1NTX554B5sxLvJWXvFZ+zuX9XPFPJ9vXq4AuPKbFyjUbj87MX7/yVK6+8clTbA6pZU1OTOjs7j89KKg2Kxf5d9kyle8MDGr/8ToXijRXf6UkPIBz2F3QBjE4ikRjlFmz277JmKsmOrUpuf1+N162o+A4BjE1W+3dZM5WhLW9reHendixv9W87h/ulUFjpzz7RpDXrR/VAAJxbVvt3WVFpXHar4m03HL994IdrFJk8VRNuXlXOZgCMQVb7d1lRCcbq/Mvx29FaBWvrzdZXAJw7Vvt3xSe/eTjkAb64JlW4pME7vwEwRVQAnPmoREZ+S9pElGwBY0ZN0HDn9vfv07eXc5e/uNauBBcZbgvA6EyOBhU26oq3nUnREqPy9eYakzttjgQ0b9yo1oIBGGoIB/TVJpt98qqmsOLhYGlR+daUqC4Z5QzD++7V02sVPPqOUwDGhlXTajVulNOVeCigv5lWm/NzOcvRXBNU++VxfWty1J9tlMN7rF8bH9ZjrQ1a1hKt7BEDOGNa42E9e3mDlk2sUX2Z7z7ifb33fe2zG/zt5JJ3HnRBNKi/n1Gn+y6t1UDGVXLkjboLCgXkFzBivBgEwNashrDWzQwr69apP+MqXcL+HQmO7N+hIkcfRQ+uvDdTGhcJaFxZDxnA+cALxHjLl3s5TwWANV7vBWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMBW23Ry+KDZt2qQDBw741z/66CP/4+bNm/Xqq6/616PRqBYvXnxOHyPGpoDruu65fhAYG7Kuq//py+h/93brL/7qr+VkswW//u9Wr9biK+foD5oiioUCZ+1xYmwjKvC9sj+lf9oxpJ60K7muPv74Y2WLRGX69OkKRyKqDUp/dmFMqy6pZTTBmgqk/+wZ1t9+eGQkKJ5AQI1NTQWHpq6+3g+KZ8iRHt2d1CO7hhhOEBVI7XtTck4ZiKbGxoJD05QjOj/xtsPRdNXj1Z8ql3Zc/fpg5rR/92Yh3mwkl1AopIYcn+tOu/q/w4UPmfDFR1SqXF/GVbaM2YjHPzQK5F6Y7R1m3b/aEZUq5xRogDcb8WYlKuPQKCuiUu2ICvILBBR955fS92+V7vwj6RcbTlqgBXIhKigo/ntTpWW3SfOuLnhIBBxDVFBQ46Jvqu4PvyHVNSgQDORcoAU+j6igqGOzk2gslneBFjiG3/1BUQ0NDf4rPqG6OkYLRTFTAWCKqAAwRVRQkJvJyEkl5Xq/XJjNnrgO5EFUUFBP+zp1Lpmi/tfa1dv+4Mj1N55j1JAXb31Q5Q6kHC14t89se4+21qstUWO2PZx/mKkAMEVUAJjiPBXk1bFwQtHRueytXkYQJyEqyItgoBIc/qAkQ9s2qWNRQj3PrmPEUBBRQVGu46hr/T2KzZzLaKEoDn9QVN8rzyg2a76cwQFGC0UxU0FB2b5eHXzhMSVWrmGkUBKigoK6Nzyg8cvvVChe+N31gWOICvJKdmxVcvv7arxuBaOEkrGmgryGtryt4d2d2rG81b/tHO6XQmGlP/tEk9asZ+SQE1FBXo3LblW87Ybjtw/8cI0ik6dqws2rGDXkRVSQVzBW51+O347WKlhbz/oKCiIqKBmHPCgFC7UATBEVAKaISpWLGT8DokH+hEe1IypVLh4OqDliF4Lptaf/7WVUF6JS5QKBgJY027z94+x4SJOtpz447/AMgL5zcUyX1I7uqRAPBXTfpfyxMfDG1ziqe9jRT/am9G/dw9o95GjYLT403oHOxGhAbRNq9CdToppRz6EPiAoAYxz+ADBFVACYIioATBEVALL0/wS1Td+LmKNVAAAAAElFTkSuQmCC",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAARUAAAEnCAYAAACHXNdEAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAD4NJREFUeJzt3X+Q03V+x/FXfuwm+yPsQlYWsAIy4sFtT37pTL2WA3eEO6p4Mw4zWq9iubFqb9pK524qVO2MPTvlD7T27vAn4unO3Y2O1vPUG+s5bW3HOWUsFo6euK4IghywP2B3WTbZJN9v55vlh2CSTbJvSGKej5nMJkvyzQfN97mf7yffDT7XdV0BgBG/1YYAwENUAJgiKgBMBW03h0o3kHD0u7ijkTxW2gKSJof8aqnlZxNOIypI2zmY1D/vGdavjySVKvCxX2kM6M8vCuvrF9Seo9Ghkvh49we/HUxq9Y5jGkwV/1Lw5iob5zTomsmEpdoxb4We3B8bV1A8jqQf7B02GxMqF1GpcinX1b/3Jky2tWfYUddQoQdP+KIhKlVuIOnquDfNMOIt8qK6EZUqlzBuQIIluqpHVJDT0Z9v0d5bl6qzfbJ6ntpQ6uGgAhAV5BSMtiq65i5Flqws9VBQIThPBTk1Lr4m/XXo7V+VeiioEMxUAJgiKgBMERUApogKAFNEBTm5yaSceExuKiWlUqevA1kQFeTU27FRXcunaeDVDvV1PDB6/fVnSz0slDF+S7nKHY47WvxOv9n2HmlrUHuU31SuZsxUAJgiKgBMcUYtsupcMmnM+1z6Zt95GQsqB1FBVgQDxeDwB3kZ3rlVnUuj6n1mY6mHgjJHVDAm13HUveluhecsKPVQUAE4/MGY+l9+WuG5i+QMDZZ6KKgAzFSQU6q/T0eef1TRNetLPRRUCKKCnHo236+Jq+5QINJU6qGgQhAVZBXr3KHYrvfUdO3qUg8FFYQ1FWQ1vP0tjezr0u5VbenbzrEBKRBU4tOPNWX9plIPD2WKqCCrppW3KNJ+vQYGBtTT3a2af/2RGi/+kibdtLbUQ0MZIyrIyh+uT18GB4elpqgS8ssXrmd9BTkRFeQUj8UUj8dHb6y5VzWtraUeEsocC7XI6Wj/0TNvHz3zNnA2ooKsHMfRQP/AGd/zZi3e7AXIhqhUuZocr4DBgQFl+gyvs2cvZ2zP57MaGioUUalyE4I+NQQy/1m2Qx1v9uLNYjKZGuIlVe14BVS5gM+nqybV5F6gPYs3e/FmMWebWefXJdkKhapBVKBbLwqnZyz5HuJkmsV4L6S1M+vOyfhQWYgKNLcxqB9/pVFfmxiU15ZMC7RnO7lg66VofiSgH3y5QSsu4AOvwafp4yzHkq5+e6hXy77xx+kzabPx+f167mc/1VULL1O0lp9NOI2oIKP+/n7FTrx1vHPnTl199dW68cYb9dBDD6W/FwgE1NLSUuJRohxxRi0yampqSl88Bw8eTH9tbm5WK2fUYgzMWwGYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmgmPd4UAspd3HHcUdd8yN+X1StMavtkhAAZ/PaowAzpEPh1L6XdxRIo/9u8bv09SQX7MbAsVFZcdAUv/QdVy/OZYqeKDRGp/+dFpI35lRV/BjAZx7vzw8oof2DGtvzCn4sTPCft05s07XTK7NPypevb79m2MaTI1dr0x6E67+ZW9MCVfpJwdQPl7vHtF3dw2p8JyM8kL0vV1DqvFJyy+ozW9N5acH4kUH5bN+vD+muMF2ANjZvD9WdFBOck5sJ5OMUfmPvhFZOO5I7/QnTbYFYPx6RxxtHyx8SSMTbzs9I05+UTkct5tddGd4UgClYb0/5h0Vm46NStAUoGxYr0YkXYPzVI7+fIv23rpUne2T1fPUBqOhASgHFvt3wVEJRlsVXXOXIktWFvWEAMqXxf495slvZ2tcfE3669Dbvyr6SQGUJ4v9m9P0AZgiKgBMERUApogKgNJGxU0m5cRjclMpKZU6fb1Ie/bs0WuvvSbX5XR+YDy2bt2qbdu2jWsbFvt3wVHp7dioruXTNPBqh/o6Hhi9/vqzBW0jkUjoxRdf1PLlyzVr1iytWLFCb7zxRqFDAXBCd3e3rrzySi1atEgLFizQE088ocHBQZVi/y74LeWWNevSl2JnJZs3b9bjjz+e/o8QCAROzVDi8XhR2wQgpbxZhTN6+vr27dt122236c4779TNN9+s22+/XQsXLjzn+/d5W1NJppJnzEo2bNiQDsrJ/xAAbJ38QT08PKwtW7akZy/z589Pz16GhoZ0rvncDIsZX/qvIyYb7+npVuKpf9TgK8+kZyW5InL55ZfnXVOcX729vXrhhRc0d+5cLV68uNTDQQZHjx7Vc889p2x8Jz6JsXb2ZQp//2dqnTJFFl5YENHvR4LFH/50Lpk05n0ufbPv1PW+3j5pYCCvWcm7776bvqB8vf/+++kLKo97cpkhFlO8vz9jVArdv7MpKCr5bPCzLrzwQrXMn68d//2S/H5/zrA8+OCDam9vL2j7OD8++OAD3XDDDVq1apXuueeeUg8HGXhLCsuWLVM2wWBQyWRSX25rU2+WWUqh+3fW5yr2gcM7t2rfX65Q9NvrFV39vYz3aWhs1Lq71umr312jJ598Uo899pgOHTqU8VBo9uzZmjdvXrHDwXnQ0tLC/6MydfDgwYyHPN4MpaGhQatXr04v2AZnten69wZN9m/ThVrXcdS96W6F5yzI6/7Tp0/Xfffdp/379+ull15KL9p6f2EvLgBsebMSj/fWsvfD3PtB/vDDD+f9A6HQ/ftzz1/Mg/pfflrhuYvkDA0W/Je97rrr0pdPPvnk1OzFWwhsbW0tZigAJIXDYTU3N6fPAfNmJd5byt47Pudz/y56ppLq79OR5x9VdM16jcdnZy/e+StXXHHFuLYHVLPm5mZ1dXWdmpUUGxSL/bvgmUrP5vs1cdUdCkSain7SMwYQDKYXdAGMTzQaHecWbPbvgmYqsc4diu16T03Xri76CQGUJ6v9u6CZyvD2tzSyr0u7V7WlbzvHBqRAUIlPP9aU9ZvGNRAApWW1fxcUlaaVtyjSfv2p24d/uF41U2do0k1rC9kMgDJktX8XFBV/uD59OXU7VCd/XYPZ+gqA0rHav4s++c3DIQ/wxTWlyP2bT34DYIqoADj3UakZ/S1pEyGyBZSNWr/hzp3evz+/vYy7/PQ6uxJcZLgtAOMzNeRX0Kgr3namhPKMytdbak2etKXGp4UTxrUWDMBQY9Cnrzbb7JNXNgcVCfrzi8q3poV08ThnGN6j182qk//EJ04BKA9rZ9ZpwjinK5GAT38zsy7jn2UsR0utXx2XRfStqaH0bKMQ3li/NjGoR9satbI1VNyIAZwzbZGgnrmsUSsn16qhwE8f8e7vPa5jXmN6O5lknQddEPLr72fX695L6jSYdBUb/aDunAI+pQtYY7wYBMDW3MagNs4JKuXWayDpKpHH/l3jH92/A2McfYx5cOV9mNKEGp8mFDRkAJXAC8REy7d7OU8FgDWiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYIqoADBFVACYIioATBEVAKaICgBTRAWAKaICwBRRAWCKqAAwRVQAmCIqAEwRFQCmiAoAU0QFgCmiAsAUUQFgiqgAMEVUAJgiKgBMERUApogKAFNEBYApogLAFFEBYCpouzl8UWzdulWHDx9OX//www/TX7dt26ZXXnklfT0UCmnZsmUlHSPKk891XbfUg0B5SLmu/qc/qf890KO/+Ku/lpNK5bz/361bp2VXzNcfNNcoHPCdt3GivBEVpL18KK5/2j2s3oQrua4++ugjpcaIyqxZsxSsqVGdX/qzC8Nae3HdeRsvyhdrKtB/9o7obz84PhoUj8+npubmnI+pb2hIB8Uz7EiP7Ivp4b3D52O4KHNEBeo4EJdz1veam5pyPqY5Q3R+4m2HiW/VIypVLuG4+vWR5Oe+781CvNlIJoFAQI0Z/qwn4er/juU+ZMIXH1Gpcv1JV6kCZiOe9KGRL/PCbN8IM5VqR1SqnJOjAd5sxJuVqIBDo5SISrUjKsjO51Po7V9K379FuuOPpF9sPmOBFsiEqCCnyO/NkFbeKi28KuchEXASUUFOTUu/qfo//IZU3yif35dxgRb4LKKCMZ2cnYTC4awLtMBJ/O4PxtTY2Jh+xydQX1/qoaACMFMBYIqoADBFVJCTm0zKicfker9cmEqdvg5kQVSQU2/HRnUtn6aBVzvU1/HA6PXXny31sFDG+OiDKnc47mjxO/1m23ukrUHt0Vqz7aHyMFMBYIqoADDFeSrIqnPJpDHvc+mbfedlLKgcRAVZEQwUg8Mf5GV451Z1Lo2q95mNpR4KyhxRwZhcx1H3prsVnrOg1ENBBeDwB2Pqf/lphecukjM0WOqhoAIwU0FOqf4+HXn+UUXXrC/1UFAhiApy6tl8vyauukOBSO5P1wdOIirIKta5Q7Fd76np2tWlHgoqCGsqyGp4+1sa2del3ava0redYwNSIKjEpx9ryvpNpR4eyhRRQVZNK29RpP36U7cP/3C9aqbO0KSb1pZ0XChvRAVZ+cP16cup26E6+esaWF9BTkQFeeOQB/lgoRaAKaICwBRRqXJh41dAyM8/4VHtiEqViwR9aqmxC8Gsus//28uoLkSlyvl8Pi1vsfn4x3mRgKZaT31QcXgFQN+ZHtbFdeN7KUQCPt17Cf/YGPjga5zQM+LoJwfi+reeEe0bdjSSx6vCO9CZHPKpfVKt/mRaSLMbOPQBUQFgjMMfAKaICgBTRAWAKaICQJb+HwS1Td+bGZaOAAAAAElFTkSuQmCC",
       "text/plain": [
        "<Figure size 300x300 with 1 Axes>"
       ]
@@ -147,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -165,7 +162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -175,14 +172,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "548 μs ± 34.7 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+      "448 μs ± 72.8 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
      ]
     }
    ],
@@ -212,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -229,7 +226,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -238,7 +235,7 @@
        "sympy.core.symbol.Symbol"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -258,7 +255,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -272,7 +269,7 @@
        "x â‹…(x + y + 5) + x "
       ]
      },
-     "execution_count": 48,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -291,7 +288,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -305,7 +302,7 @@
        "x  + x â‹…y + 6â‹…x "
       ]
      },
-     "execution_count": 49,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -316,7 +313,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -330,7 +327,7 @@
        "x â‹…(x + y + 6)"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -341,7 +338,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -355,7 +352,7 @@
        "x â‹…(x + cos(x) + 5) + x "
       ]
      },
-     "execution_count": 51,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -373,7 +370,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -387,7 +384,7 @@
        "x â‹…(x + y + 5) + x  = 1"
       ]
      },
-     "execution_count": 52,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -399,7 +396,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -415,7 +412,7 @@
        "⎣         x ⎦"
       ]
      },
-     "execution_count": 53,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -433,7 +430,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -447,7 +444,7 @@
        "x â‹…(x + y + 5) + x "
       ]
      },
-     "execution_count": 54,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -458,7 +455,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -467,7 +464,7 @@
        "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
        "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
        " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
-       "<!-- Generated by graphviz version 11.0.0 (0)\n",
+       "<!-- Generated by graphviz version 12.1.2 (0)\n",
        " -->\n",
        "<!-- Pages: 1 -->\n",
        "<svg width=\"422pt\" height=\"260pt\"\n",
@@ -616,10 +613,10 @@
        "</svg>\n"
       ],
       "text/plain": [
-       "<graphviz.sources.Source at 0x7e3154f58d30>"
+       "<graphviz.sources.Source at 0x7fd4d0f809d0>"
       ]
      },
-     "execution_count": 55,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -638,7 +635,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -647,7 +644,7 @@
        "sympy.core.add.Add"
       ]
      },
-     "execution_count": 56,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -658,7 +655,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -672,7 +669,7 @@
        "⎝x , x ⋅(x + y + 5)⎠"
       ]
      },
-     "execution_count": 57,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -699,7 +696,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -715,7 +712,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -728,7 +725,7 @@
        "f_E__1"
       ]
      },
-     "execution_count": 59,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -747,7 +744,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -756,7 +753,7 @@
        "True"
       ]
      },
-     "execution_count": 60,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -776,7 +773,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -785,7 +782,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -802,7 +799,7 @@
        "_W__2â‹…wâ‚‚) "
       ]
      },
-     "execution_count": 62,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -824,7 +821,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -841,7 +838,7 @@
        " img_W__2â‹…wâ‚‚) "
       ]
      },
-     "execution_count": 63,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -860,7 +857,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -877,7 +874,7 @@
        "g_SW__2 - img_W__2â‹…wâ‚‚) "
       ]
      },
-     "execution_count": 64,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -897,14 +894,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/media/data/fhennig/research-hpc/projects/2024_pystencils_nbackend/pystencils/src/pystencils/config.py:327: FutureWarning: The `cpu_openmp` option of CreateKernelConfig is deprecated and will be removed in pystencils 2.1. Use `cpu_optim.openmp` instead.\n",
+      "/media/data/fhennig/research-hpc/projects/2024_pystencils_nbackend/pystencils/src/pystencils/codegen/config.py:633: FutureWarning: The `cpu_openmp` option of CreateKernelConfig is deprecated and will be removed in pystencils 2.1. Use `cpu_optim.openmp` instead.\n",
+      "  warn(\n",
+      "/media/data/fhennig/research-hpc/projects/2024_pystencils_nbackend/pystencils/src/pystencils/codegen/config.py:543: UserWarning: Setting the deprecated `cpu_openmp` option will override any options passed in the `cpu.openmp` category.\n",
       "  warn(\n"
      ]
     }
@@ -925,15 +924,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No requests or imageio installed\n"
-     ]
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -953,12 +955,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "image/png": "",
+      "image/png": "",
       "text/plain": [
        "<Figure size 640x480 with 1 Axes>"
       ]
@@ -985,7 +987,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1002,7 +1004,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
@@ -1016,9 +1018,9 @@
        ".highlight .hll { background-color: #ffffcc }\n",
        ".highlight { background: #f8f8f8; }\n",
        ".highlight .c { color: #3D7B7B; font-style: italic } /* Comment */\n",
-       ".highlight .err { border: 1px solid #FF0000 } /* Error */\n",
+       ".highlight .err { border: 1px solid #F00 } /* Error */\n",
        ".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n",
-       ".highlight .o { color: #666666 } /* Operator */\n",
+       ".highlight .o { color: #666 } /* Operator */\n",
        ".highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */\n",
        ".highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */\n",
        ".highlight .cp { color: #9C6500 } /* Comment.Preproc */\n",
@@ -1035,34 +1037,34 @@
        ".highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
        ".highlight .gs { font-weight: bold } /* Generic.Strong */\n",
        ".highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
-       ".highlight .gt { color: #0044DD } /* Generic.Traceback */\n",
+       ".highlight .gt { color: #04D } /* Generic.Traceback */\n",
        ".highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
        ".highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
        ".highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
        ".highlight .kp { color: #008000 } /* Keyword.Pseudo */\n",
        ".highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
        ".highlight .kt { color: #B00040 } /* Keyword.Type */\n",
-       ".highlight .m { color: #666666 } /* Literal.Number */\n",
+       ".highlight .m { color: #666 } /* Literal.Number */\n",
        ".highlight .s { color: #BA2121 } /* Literal.String */\n",
        ".highlight .na { color: #687822 } /* Name.Attribute */\n",
        ".highlight .nb { color: #008000 } /* Name.Builtin */\n",
-       ".highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
-       ".highlight .no { color: #880000 } /* Name.Constant */\n",
-       ".highlight .nd { color: #AA22FF } /* Name.Decorator */\n",
+       ".highlight .nc { color: #00F; font-weight: bold } /* Name.Class */\n",
+       ".highlight .no { color: #800 } /* Name.Constant */\n",
+       ".highlight .nd { color: #A2F } /* Name.Decorator */\n",
        ".highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */\n",
        ".highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */\n",
-       ".highlight .nf { color: #0000FF } /* Name.Function */\n",
+       ".highlight .nf { color: #00F } /* Name.Function */\n",
        ".highlight .nl { color: #767600 } /* Name.Label */\n",
-       ".highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
+       ".highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */\n",
        ".highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
        ".highlight .nv { color: #19177C } /* Name.Variable */\n",
-       ".highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
-       ".highlight .w { color: #bbbbbb } /* Text.Whitespace */\n",
-       ".highlight .mb { color: #666666 } /* Literal.Number.Bin */\n",
-       ".highlight .mf { color: #666666 } /* Literal.Number.Float */\n",
-       ".highlight .mh { color: #666666 } /* Literal.Number.Hex */\n",
-       ".highlight .mi { color: #666666 } /* Literal.Number.Integer */\n",
-       ".highlight .mo { color: #666666 } /* Literal.Number.Oct */\n",
+       ".highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */\n",
+       ".highlight .w { color: #BBB } /* Text.Whitespace */\n",
+       ".highlight .mb { color: #666 } /* Literal.Number.Bin */\n",
+       ".highlight .mf { color: #666 } /* Literal.Number.Float */\n",
+       ".highlight .mh { color: #666 } /* Literal.Number.Hex */\n",
+       ".highlight .mi { color: #666 } /* Literal.Number.Integer */\n",
+       ".highlight .mo { color: #666 } /* Literal.Number.Oct */\n",
        ".highlight .sa { color: #BA2121 } /* Literal.String.Affix */\n",
        ".highlight .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
        ".highlight .sc { color: #BA2121 } /* Literal.String.Char */\n",
@@ -1077,12 +1079,12 @@
        ".highlight .s1 { color: #BA2121 } /* Literal.String.Single */\n",
        ".highlight .ss { color: #19177C } /* Literal.String.Symbol */\n",
        ".highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
-       ".highlight .fm { color: #0000FF } /* Name.Function.Magic */\n",
+       ".highlight .fm { color: #00F } /* Name.Function.Magic */\n",
        ".highlight .vc { color: #19177C } /* Name.Variable.Class */\n",
        ".highlight .vg { color: #19177C } /* Name.Variable.Global */\n",
        ".highlight .vi { color: #19177C } /* Name.Variable.Instance */\n",
        ".highlight .vm { color: #19177C } /* Name.Variable.Magic */\n",
-       ".highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style>"
+       ".highlight .il { color: #666 } /* Literal.Number.Integer.Long */</style>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -1094,32 +1096,28 @@
     {
      "data": {
       "text/html": [
-       "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_size_dst_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_size_dst_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\">  </span><span class=\"n\">dst_data</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\">  </span><span class=\"n\">img_data</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"p\">)</span>\n",
+       "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_dst</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_size_dst_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_size_dst_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"p\">)</span>\n",
        "<span class=\"p\">{</span>\n",
        "<span class=\"w\">   </span><span class=\"k\">for</span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">&lt;</span><span class=\"w\"> </span><span class=\"n\">_size_dst_0</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span>\n",
        "<span class=\"w\">   </span><span class=\"p\">{</span>\n",
        "<span class=\"w\">      </span><span class=\"k\">for</span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">&lt;</span><span class=\"w\"> </span><span class=\"n\">_size_dst_1</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span>\n",
        "<span class=\"w\">      </span><span class=\"p\">{</span>\n",
-       "<span class=\"w\">         </span><span class=\"n\">dst_data</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">])</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]);</span>\n",
+       "<span class=\"w\">         </span><span class=\"n\">_data_dst</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">])</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]);</span>\n",
        "<span class=\"w\">      </span><span class=\"p\">}</span>\n",
-       "\n",
        "<span class=\"w\">   </span><span class=\"p\">}</span>\n",
-       "\n",
        "<span class=\"p\">}</span>\n",
        "</pre></div>\n"
       ],
       "text/plain": [
-       "FUNC_PREFIX void kernel (const int64_t _size_dst_0, const int64_t _size_dst_1, const int64_t _stride_dst_0, const int64_t _stride_dst_1, const int64_t _stride_img_0, const int64_t _stride_img_1, const int64_t _stride_img_2, double * const  dst_data, double * const  img_data, const double w_2)\n",
+       "FUNC_PREFIX void kernel (double * RESTRICT const _data_dst, double * RESTRICT const _data_img, const int64_t _size_dst_0, const int64_t _size_dst_1, const int64_t _stride_dst_0, const int64_t _stride_dst_1, const int64_t _stride_img_0, const int64_t _stride_img_1, const int64_t _stride_img_2, const double w_2)\n",
        "{\n",
        "   for(int64_t ctr_0 = 1LL; ctr_0 < _size_dst_0 - 1LL; ctr_0 += 1LL)\n",
        "   {\n",
        "      for(int64_t ctr_1 = 1LL; ctr_1 < _size_dst_1 - 1LL; ctr_1 += 1LL)\n",
        "      {\n",
-       "         dst_data[ctr_0 * _stride_dst_0 + ctr_1 * _stride_dst_1] = (0.5 * img_data[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + w_2 * img_data[(ctr_0 + 1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2] - w_2 * img_data[(ctr_0 + -1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2]) * (0.5 * img_data[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + w_2 * img_data[(ctr_0 + 1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2] - w_2 * img_data[(ctr_0 + -1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2]);\n",
+       "         _data_dst[ctr_0 * _stride_dst_0 + ctr_1 * _stride_dst_1] = (0.5 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + w_2 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2] - w_2 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2]) * (0.5 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + w_2 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2] - w_2 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2]);\n",
        "      }\n",
-       "\n",
        "   }\n",
-       "\n",
        "}"
       ]
      },
@@ -1140,7 +1138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -1154,9 +1152,9 @@
        ".highlight .hll { background-color: #ffffcc }\n",
        ".highlight { background: #f8f8f8; }\n",
        ".highlight .c { color: #3D7B7B; font-style: italic } /* Comment */\n",
-       ".highlight .err { border: 1px solid #FF0000 } /* Error */\n",
+       ".highlight .err { border: 1px solid #F00 } /* Error */\n",
        ".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n",
-       ".highlight .o { color: #666666 } /* Operator */\n",
+       ".highlight .o { color: #666 } /* Operator */\n",
        ".highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */\n",
        ".highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */\n",
        ".highlight .cp { color: #9C6500 } /* Comment.Preproc */\n",
@@ -1173,34 +1171,34 @@
        ".highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
        ".highlight .gs { font-weight: bold } /* Generic.Strong */\n",
        ".highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
-       ".highlight .gt { color: #0044DD } /* Generic.Traceback */\n",
+       ".highlight .gt { color: #04D } /* Generic.Traceback */\n",
        ".highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
        ".highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
        ".highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
        ".highlight .kp { color: #008000 } /* Keyword.Pseudo */\n",
        ".highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
        ".highlight .kt { color: #B00040 } /* Keyword.Type */\n",
-       ".highlight .m { color: #666666 } /* Literal.Number */\n",
+       ".highlight .m { color: #666 } /* Literal.Number */\n",
        ".highlight .s { color: #BA2121 } /* Literal.String */\n",
        ".highlight .na { color: #687822 } /* Name.Attribute */\n",
        ".highlight .nb { color: #008000 } /* Name.Builtin */\n",
-       ".highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
-       ".highlight .no { color: #880000 } /* Name.Constant */\n",
-       ".highlight .nd { color: #AA22FF } /* Name.Decorator */\n",
+       ".highlight .nc { color: #00F; font-weight: bold } /* Name.Class */\n",
+       ".highlight .no { color: #800 } /* Name.Constant */\n",
+       ".highlight .nd { color: #A2F } /* Name.Decorator */\n",
        ".highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */\n",
        ".highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */\n",
-       ".highlight .nf { color: #0000FF } /* Name.Function */\n",
+       ".highlight .nf { color: #00F } /* Name.Function */\n",
        ".highlight .nl { color: #767600 } /* Name.Label */\n",
-       ".highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
+       ".highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */\n",
        ".highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
        ".highlight .nv { color: #19177C } /* Name.Variable */\n",
-       ".highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
-       ".highlight .w { color: #bbbbbb } /* Text.Whitespace */\n",
-       ".highlight .mb { color: #666666 } /* Literal.Number.Bin */\n",
-       ".highlight .mf { color: #666666 } /* Literal.Number.Float */\n",
-       ".highlight .mh { color: #666666 } /* Literal.Number.Hex */\n",
-       ".highlight .mi { color: #666666 } /* Literal.Number.Integer */\n",
-       ".highlight .mo { color: #666666 } /* Literal.Number.Oct */\n",
+       ".highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */\n",
+       ".highlight .w { color: #BBB } /* Text.Whitespace */\n",
+       ".highlight .mb { color: #666 } /* Literal.Number.Bin */\n",
+       ".highlight .mf { color: #666 } /* Literal.Number.Float */\n",
+       ".highlight .mh { color: #666 } /* Literal.Number.Hex */\n",
+       ".highlight .mi { color: #666 } /* Literal.Number.Integer */\n",
+       ".highlight .mo { color: #666 } /* Literal.Number.Oct */\n",
        ".highlight .sa { color: #BA2121 } /* Literal.String.Affix */\n",
        ".highlight .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
        ".highlight .sc { color: #BA2121 } /* Literal.String.Char */\n",
@@ -1215,12 +1213,12 @@
        ".highlight .s1 { color: #BA2121 } /* Literal.String.Single */\n",
        ".highlight .ss { color: #19177C } /* Literal.String.Symbol */\n",
        ".highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
-       ".highlight .fm { color: #0000FF } /* Name.Function.Magic */\n",
+       ".highlight .fm { color: #00F } /* Name.Function.Magic */\n",
        ".highlight .vc { color: #19177C } /* Name.Variable.Class */\n",
        ".highlight .vg { color: #19177C } /* Name.Variable.Global */\n",
        ".highlight .vi { color: #19177C } /* Name.Variable.Instance */\n",
        ".highlight .vm { color: #19177C } /* Name.Variable.Magic */\n",
-       ".highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style>"
+       ".highlight .il { color: #666 } /* Literal.Number.Integer.Long */</style>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -1232,34 +1230,30 @@
     {
      "data": {
       "text/html": [
-       "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_size_dst_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_size_dst_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\">  </span><span class=\"n\">dst_data</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\">  </span><span class=\"n\">img_data</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"p\">)</span>\n",
+       "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_dst</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_size_dst_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_size_dst_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"p\">)</span>\n",
        "<span class=\"p\">{</span>\n",
        "<span class=\"w\">   </span><span class=\"cp\">#pragma omp parallel for schedule(static) num_threads(2)</span>\n",
        "<span class=\"w\">   </span><span class=\"k\">for</span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">&lt;</span><span class=\"w\"> </span><span class=\"n\">_size_dst_0</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span>\n",
        "<span class=\"w\">   </span><span class=\"p\">{</span>\n",
        "<span class=\"w\">      </span><span class=\"k\">for</span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">&lt;</span><span class=\"w\"> </span><span class=\"n\">_size_dst_1</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span>\n",
        "<span class=\"w\">      </span><span class=\"p\">{</span>\n",
-       "<span class=\"w\">         </span><span class=\"n\">dst_data</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">])</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">img_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]);</span>\n",
+       "<span class=\"w\">         </span><span class=\"n\">_data_dst</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_dst_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">])</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"mf\">0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-0.5</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">w_2</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_img</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_stride_img_2</span><span class=\"p\">]);</span>\n",
        "<span class=\"w\">      </span><span class=\"p\">}</span>\n",
-       "\n",
        "<span class=\"w\">   </span><span class=\"p\">}</span>\n",
-       "\n",
        "<span class=\"p\">}</span>\n",
        "</pre></div>\n"
       ],
       "text/plain": [
-       "FUNC_PREFIX void kernel (const int64_t _size_dst_0, const int64_t _size_dst_1, const int64_t _stride_dst_0, const int64_t _stride_dst_1, const int64_t _stride_img_0, const int64_t _stride_img_1, const int64_t _stride_img_2, double * const  dst_data, double * const  img_data, const double w_2)\n",
+       "FUNC_PREFIX void kernel (double * RESTRICT const _data_dst, double * RESTRICT const _data_img, const int64_t _size_dst_0, const int64_t _size_dst_1, const int64_t _stride_dst_0, const int64_t _stride_dst_1, const int64_t _stride_img_0, const int64_t _stride_img_1, const int64_t _stride_img_2, const double w_2)\n",
        "{\n",
        "   #pragma omp parallel for schedule(static) num_threads(2)\n",
        "   for(int64_t ctr_0 = 1LL; ctr_0 < _size_dst_0 - 1LL; ctr_0 += 1LL)\n",
        "   {\n",
        "      for(int64_t ctr_1 = 1LL; ctr_1 < _size_dst_1 - 1LL; ctr_1 += 1LL)\n",
        "      {\n",
-       "         dst_data[ctr_0 * _stride_dst_0 + ctr_1 * _stride_dst_1] = (0.5 * img_data[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + w_2 * img_data[(ctr_0 + 1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2] - w_2 * img_data[(ctr_0 + -1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2]) * (0.5 * img_data[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] - 0.5 * img_data[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + w_2 * img_data[(ctr_0 + 1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2] - w_2 * img_data[(ctr_0 + -1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2]);\n",
+       "         _data_dst[ctr_0 * _stride_dst_0 + ctr_1 * _stride_dst_1] = (0.5 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + w_2 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2] - w_2 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2]) * (0.5 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + 1LL) * _stride_img_1 + 2LL * _stride_img_2] + -0.5 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + (ctr_1 + -1LL) * _stride_img_1 + 2LL * _stride_img_2] + w_2 * _data_img[(ctr_0 + 1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2] - w_2 * _data_img[(ctr_0 + -1LL) * _stride_img_0 + ctr_1 * _stride_img_1 + 2LL * _stride_img_2]);\n",
        "      }\n",
-       "\n",
        "   }\n",
-       "\n",
        "}"
       ]
      },
@@ -1270,8 +1264,8 @@
    "source": [
     "ast = ps.create_kernel(\n",
     "    update_rule,\n",
-    "    cpu_optim = ps.CpuOptimConfig(\n",
-    "        openmp=ps.OpenMpConfig(num_threads=2))\n",
+    "    cpu = ps.CpuOptions(\n",
+    "        openmp=ps.OpenMpOptions(enable=True, num_threads=2))\n",
     "    )\n",
     "\n",
     "ps.show_code(ast)"
@@ -1289,7 +1283,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1303,9 +1297,9 @@
        ".highlight .hll { background-color: #ffffcc }\n",
        ".highlight { background: #f8f8f8; }\n",
        ".highlight .c { color: #3D7B7B; font-style: italic } /* Comment */\n",
-       ".highlight .err { border: 1px solid #FF0000 } /* Error */\n",
+       ".highlight .err { border: 1px solid #F00 } /* Error */\n",
        ".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n",
-       ".highlight .o { color: #666666 } /* Operator */\n",
+       ".highlight .o { color: #666 } /* Operator */\n",
        ".highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */\n",
        ".highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */\n",
        ".highlight .cp { color: #9C6500 } /* Comment.Preproc */\n",
@@ -1322,34 +1316,34 @@
        ".highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
        ".highlight .gs { font-weight: bold } /* Generic.Strong */\n",
        ".highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
-       ".highlight .gt { color: #0044DD } /* Generic.Traceback */\n",
+       ".highlight .gt { color: #04D } /* Generic.Traceback */\n",
        ".highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
        ".highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
        ".highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
        ".highlight .kp { color: #008000 } /* Keyword.Pseudo */\n",
        ".highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
        ".highlight .kt { color: #B00040 } /* Keyword.Type */\n",
-       ".highlight .m { color: #666666 } /* Literal.Number */\n",
+       ".highlight .m { color: #666 } /* Literal.Number */\n",
        ".highlight .s { color: #BA2121 } /* Literal.String */\n",
        ".highlight .na { color: #687822 } /* Name.Attribute */\n",
        ".highlight .nb { color: #008000 } /* Name.Builtin */\n",
-       ".highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
-       ".highlight .no { color: #880000 } /* Name.Constant */\n",
-       ".highlight .nd { color: #AA22FF } /* Name.Decorator */\n",
+       ".highlight .nc { color: #00F; font-weight: bold } /* Name.Class */\n",
+       ".highlight .no { color: #800 } /* Name.Constant */\n",
+       ".highlight .nd { color: #A2F } /* Name.Decorator */\n",
        ".highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */\n",
        ".highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */\n",
-       ".highlight .nf { color: #0000FF } /* Name.Function */\n",
+       ".highlight .nf { color: #00F } /* Name.Function */\n",
        ".highlight .nl { color: #767600 } /* Name.Label */\n",
-       ".highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
+       ".highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */\n",
        ".highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
        ".highlight .nv { color: #19177C } /* Name.Variable */\n",
-       ".highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
-       ".highlight .w { color: #bbbbbb } /* Text.Whitespace */\n",
-       ".highlight .mb { color: #666666 } /* Literal.Number.Bin */\n",
-       ".highlight .mf { color: #666666 } /* Literal.Number.Float */\n",
-       ".highlight .mh { color: #666666 } /* Literal.Number.Hex */\n",
-       ".highlight .mi { color: #666666 } /* Literal.Number.Integer */\n",
-       ".highlight .mo { color: #666666 } /* Literal.Number.Oct */\n",
+       ".highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */\n",
+       ".highlight .w { color: #BBB } /* Text.Whitespace */\n",
+       ".highlight .mb { color: #666 } /* Literal.Number.Bin */\n",
+       ".highlight .mf { color: #666 } /* Literal.Number.Float */\n",
+       ".highlight .mh { color: #666 } /* Literal.Number.Hex */\n",
+       ".highlight .mi { color: #666 } /* Literal.Number.Integer */\n",
+       ".highlight .mo { color: #666 } /* Literal.Number.Oct */\n",
        ".highlight .sa { color: #BA2121 } /* Literal.String.Affix */\n",
        ".highlight .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
        ".highlight .sc { color: #BA2121 } /* Literal.String.Char */\n",
@@ -1364,12 +1358,12 @@
        ".highlight .s1 { color: #BA2121 } /* Literal.String.Single */\n",
        ".highlight .ss { color: #19177C } /* Literal.String.Symbol */\n",
        ".highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
-       ".highlight .fm { color: #0000FF } /* Name.Function.Magic */\n",
+       ".highlight .fm { color: #00F } /* Name.Function.Magic */\n",
        ".highlight .vc { color: #19177C } /* Name.Variable.Class */\n",
        ".highlight .vg { color: #19177C } /* Name.Variable.Global */\n",
        ".highlight .vi { color: #19177C } /* Name.Variable.Instance */\n",
        ".highlight .vm { color: #19177C } /* Name.Variable.Magic */\n",
-       ".highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style>"
+       ".highlight .il { color: #666 } /* Literal.Number.Integer.Long */</style>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -1381,32 +1375,28 @@
     {
      "data": {
       "text/html": [
-       "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\">  </span><span class=\"n\">I_data</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\">  </span><span class=\"n\">dst_data</span><span class=\"p\">)</span>\n",
+       "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_I</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_dst</span><span class=\"p\">)</span>\n",
        "<span class=\"p\">{</span>\n",
-       "<span class=\"w\">   </span><span class=\"k\">for</span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">&lt;</span><span class=\"w\"> </span><span class=\"mf\">81L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span>\n",
+       "<span class=\"w\">   </span><span class=\"k\">for</span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">&lt;</span><span class=\"w\"> </span><span class=\"mf\">202L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span>\n",
        "<span class=\"w\">   </span><span class=\"p\">{</span>\n",
-       "<span class=\"w\">      </span><span class=\"k\">for</span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">&lt;</span><span class=\"w\"> </span><span class=\"mf\">289L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span>\n",
+       "<span class=\"w\">      </span><span class=\"k\">for</span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">&lt;</span><span class=\"w\"> </span><span class=\"mf\">600L</span><span class=\"n\">L</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span>\n",
        "<span class=\"w\">      </span><span class=\"p\">{</span>\n",
-       "<span class=\"w\">         </span><span class=\"n\">dst_data</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">290L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">-1.0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">I_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">1160L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">I_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">1160L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">I_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">1160L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mf\">2.0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">I_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">1160L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2.0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">I_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">1160L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">I_data</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">1160L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">];</span>\n",
+       "<span class=\"w\">         </span><span class=\"n\">_data_dst</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">601L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mf\">-1.0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_I</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">2404L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">_data_I</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">2404L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"n\">_data_I</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">2404L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">-2.0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_I</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">2404L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">2.0</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">_data_I</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">2404L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">_data_I</span><span class=\"p\">[(</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">2404L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">-1LL</span><span class=\"p\">)</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"mf\">4L</span><span class=\"n\">L</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mf\">1L</span><span class=\"n\">L</span><span class=\"p\">];</span>\n",
        "<span class=\"w\">      </span><span class=\"p\">}</span>\n",
-       "\n",
        "<span class=\"w\">   </span><span class=\"p\">}</span>\n",
-       "\n",
        "<span class=\"p\">}</span>\n",
        "</pre></div>\n"
       ],
       "text/plain": [
-       "FUNC_PREFIX void kernel (double * const  I_data, double * const  dst_data)\n",
+       "FUNC_PREFIX void kernel (double * RESTRICT const _data_I, double * RESTRICT const _data_dst)\n",
        "{\n",
-       "   for(int64_t ctr_0 = 1LL; ctr_0 < 81LL; ctr_0 += 1LL)\n",
+       "   for(int64_t ctr_0 = 1LL; ctr_0 < 202LL; ctr_0 += 1LL)\n",
        "   {\n",
-       "      for(int64_t ctr_1 = 1LL; ctr_1 < 289LL; ctr_1 += 1LL)\n",
+       "      for(int64_t ctr_1 = 1LL; ctr_1 < 600LL; ctr_1 += 1LL)\n",
        "      {\n",
-       "         dst_data[ctr_0 * 290LL + ctr_1] = -1.0 * I_data[(ctr_0 + 1LL) * 1160LL + (ctr_1 + 1LL) * 4LL + 1LL] - I_data[(ctr_0 + -1LL) * 1160LL + (ctr_1 + 1LL) * 4LL + 1LL] - I_data[(ctr_0 + -1LL) * 1160LL + (ctr_1 + -1LL) * 4LL + 1LL] - 2.0 * I_data[(ctr_0 + -1LL) * 1160LL + ctr_1 * 4LL + 1LL] + 2.0 * I_data[(ctr_0 + 1LL) * 1160LL + ctr_1 * 4LL + 1LL] + I_data[(ctr_0 + 1LL) * 1160LL + (ctr_1 + -1LL) * 4LL + 1LL];\n",
+       "         _data_dst[ctr_0 * 601LL + ctr_1] = -1.0 * _data_I[(ctr_0 + 1LL) * 2404LL + (ctr_1 + 1LL) * 4LL + 1LL] - _data_I[(ctr_0 + -1LL) * 2404LL + (ctr_1 + 1LL) * 4LL + 1LL] - _data_I[(ctr_0 + -1LL) * 2404LL + (ctr_1 + -1LL) * 4LL + 1LL] + -2.0 * _data_I[(ctr_0 + -1LL) * 2404LL + ctr_1 * 4LL + 1LL] + 2.0 * _data_I[(ctr_0 + 1LL) * 2404LL + ctr_1 * 4LL + 1LL] + _data_I[(ctr_0 + 1LL) * 2404LL + (ctr_1 + -1LL) * 4LL + 1LL];\n",
        "      }\n",
-       "\n",
        "   }\n",
-       "\n",
        "}"
       ]
      },
@@ -1443,7 +1433,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -1486,7 +1476,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,
diff --git a/src/pystencils/__init__.py b/src/pystencils/__init__.py
index 2bb4aac3d..8c59f7846 100644
--- a/src/pystencils/__init__.py
+++ b/src/pystencils/__init__.py
@@ -4,7 +4,7 @@ from .codegen import (
     Target,
     CreateKernelConfig,
     CpuOptions,
-    VectorizationConfig,
+    VectorizationOptions,
     OpenMpOptions,
     GpuOptions,
     AUTO
@@ -51,7 +51,7 @@ __all__ = [
     "make_slice",
     "CreateKernelConfig",
     "CpuOptions",
-    "VectorizationConfig",
+    "VectorizationOptions",
     "GpuOptions",
     "OpenMpOptions",
     "AUTO",
diff --git a/src/pystencils/backend/kernelcreation/iteration_space.py b/src/pystencils/backend/kernelcreation/iteration_space.py
index 031a0d843..f7425c06b 100644
--- a/src/pystencils/backend/kernelcreation/iteration_space.py
+++ b/src/pystencils/backend/kernelcreation/iteration_space.py
@@ -17,7 +17,6 @@ from ...types import PsStructType
 from ..exceptions import PsInputError, KernelConstraintsError
 
 if TYPE_CHECKING:
-    from ...codegen.config import _AUTO_TYPE
     from .context import KernelCreationContext
 
 
@@ -62,6 +61,7 @@ class FullIterationSpace(IterationSpace):
     @dataclass
     class Dimension:
         """One dimension of a dense iteration space"""
+
         start: PsExpression
         stop: PsExpression
         step: PsExpression
@@ -196,7 +196,7 @@ class FullIterationSpace(IterationSpace):
     def dimensions(self):
         """The dimensions of this iteration space"""
         return self._dimensions
-    
+
     @property
     def counters(self) -> tuple[PsSymbol, ...]:
         return tuple(dim.counter for dim in self._dimensions)
@@ -220,7 +220,7 @@ class FullIterationSpace(IterationSpace):
     def archetype_field(self) -> Field | None:
         """Field whose shape and memory layout act as archetypes for this iteration space's dimensions."""
         return self._archetype_field
-    
+
     @property
     def loop_order(self) -> tuple[int, ...]:
         """Return the loop order of this iteration space, ordered from slowest to fastest coordinate."""
@@ -242,7 +242,7 @@ class FullIterationSpace(IterationSpace):
         self, dimension: int | FullIterationSpace.Dimension | None = None
     ) -> PsExpression:
         """Construct an expression representing the actual number of unique points inside the iteration space.
-        
+
         Args:
             dimension: If an integer or a `Dimension` object is given, the number of iterations in that
                 dimension is computed. If `None`, the total number of iterations inside the entire space
@@ -417,14 +417,59 @@ def create_sparse_iteration_space(
 def create_full_iteration_space(
     ctx: KernelCreationContext,
     assignments: AssignmentCollection,
-    ghost_layers: None | _AUTO_TYPE | int | Sequence[int | tuple[int, int]] = None,
+    ghost_layers: None | int | Sequence[int | tuple[int, int]] = None,
     iteration_slice: None | int | slice | tuple[int | slice, ...] = None,
+    infer_ghost_layers: bool = False,
 ) -> IterationSpace:
+    """Create a dense iteration space from a sequence of assignments and iteration slice information.
+
+    This function finds all accesses to fields in the given assignment collection,
+    analyzes the set of fields involved,
+    and determines the iteration space bounds from these.
+    This requires that either all fields are of the same, fixed, shape, or all of them are
+    variable-shaped.
+    Also, all fields need to have the same memory layout of their spatial dimensions.
+
+    Args:
+        ctx: The kernel creation context
+        assignments: Collection of assignments the iteration space should be inferred from
+        ghost_layers: If set, strip off that many ghost layers from all sides of the iteration cuboid
+        iteration_slice: If set, constrain iteration to the given slice.
+            For details on the parsing of slices, see `AstFactory.parse_slice`.
+        infer_ghost_layers: If `True`, infer the number of ghost layers from the stencil ranges
+            used in the kernel.
+
+    Returns:
+        IterationSpace: The constructed iteration space.
+
+    Raises:
+        KernelConstraintsError: If field shape or memory layout conflicts are detected
+        ValueError: If the iteration slice could not be parsed
+
+    .. attention::
+        The ``ghost_layers`` and ``iteration_slice`` arguments are mutually exclusive.
+        Also, if ``infer_ghost_layers=True``, none of them may be set.
+    """
+
     assert not ctx.fields.index_fields
 
-    if (ghost_layers is not None) and (iteration_slice is not None):
+    if not (
+        (ghost_layers is not None)
+        or (iteration_slice is not None)
+        or infer_ghost_layers
+    ):
         raise ValueError(
-            "At most one of `ghost_layers` and `iteration_slice` may be specified."
+            "One argument of `ghost_layers`, `iteration_slice`, and `infer_ghost_layers` must be set."
+        )
+
+    if (
+        int(ghost_layers is not None)
+        + int(iteration_slice is not None)
+        + int(infer_ghost_layers)
+        > 1
+    ):
+        raise ValueError(
+            "At most one of `ghost_layers`, `iteration_slice`, and `infer_ghost_layers` may be set."
         )
 
     #   Collect all relative accesses into domain fields
@@ -457,9 +502,7 @@ def create_full_iteration_space(
     # Otherwise, if an iteration slice was specified, use that
     # Otherwise, use the inferred ghost layers
 
-    from ...codegen.config import AUTO, _AUTO_TYPE
-
-    if ghost_layers is AUTO:
+    if infer_ghost_layers:
         if len(domain_field_accesses) > 0:
             inferred_gls = max(
                 [fa.required_ghost_layers for fa in domain_field_accesses]
@@ -472,7 +515,6 @@ def create_full_iteration_space(
             ctx, inferred_gls, archetype_field
         )
     elif ghost_layers is not None:
-        assert not isinstance(ghost_layers, _AUTO_TYPE)
         ctx.metadata["ghost_layers"] = ghost_layers
         return FullIterationSpace.create_with_ghost_layers(
             ctx, ghost_layers, archetype_field
diff --git a/src/pystencils/backend/platforms/cuda.py b/src/pystencils/backend/platforms/cuda.py
index 31686cb18..2559ac6d2 100644
--- a/src/pystencils/backend/platforms/cuda.py
+++ b/src/pystencils/backend/platforms/cuda.py
@@ -30,7 +30,7 @@ from ..literals import PsLiteral
 from ..functions import PsMathFunction, MathFunctions, CFunction
 
 if TYPE_CHECKING:
-    from ...codegen import GpuOptions, GpuThreadsRange
+    from ...codegen import GpuThreadsRange
 
 int32 = PsSignedIntegerType(width=32, const=False)
 
@@ -52,13 +52,15 @@ class CudaPlatform(GenericGpu):
     """Platform for CUDA-based GPUs."""
 
     def __init__(
-        self, ctx: KernelCreationContext, indexing_cfg: GpuOptions | None = None
+        self, ctx: KernelCreationContext,
+        omit_range_check: bool = False,
+        manual_launch_grid: bool = False,
     ) -> None:
         super().__init__(ctx)
 
-        from ...codegen.config import GpuOptions
+        self._omit_range_check = omit_range_check
+        self._manual_launch_grid = manual_launch_grid
 
-        self._cfg = indexing_cfg if indexing_cfg is not None else GpuOptions()
         self._typify = Typifier(ctx)
 
     @property
@@ -141,7 +143,7 @@ class CudaPlatform(GenericGpu):
     ) -> tuple[PsBlock, GpuThreadsRange | None]:
         dimensions = ispace.dimensions_in_loop_order()
 
-        if not self._cfg.manual_launch_grid:
+        if not self._manual_launch_grid:
             try:
                 threads_range = self.threads_from_ispace(ispace)
             except MaterializationError as e:
@@ -170,7 +172,7 @@ class CudaPlatform(GenericGpu):
                     )
                 )
             )
-            if not self._cfg.omit_range_check:
+            if not self._omit_range_check:
                 conds.append(PsLt(ctr, dim.stop))
 
         indexing_decls = indexing_decls[::-1]
@@ -213,7 +215,7 @@ class CudaPlatform(GenericGpu):
         ]
         body.statements = mappings + body.statements
 
-        if not self._cfg.omit_range_check:
+        if not self._omit_range_check:
             stop = PsExpression.make(ispace.index_list.shape[0])
             condition = PsLt(sparse_ctr, stop)
             ast = PsBlock([sparse_idx_decl, PsConditional(condition, body)])
diff --git a/src/pystencils/backend/platforms/sycl.py b/src/pystencils/backend/platforms/sycl.py
index b5ba7b6c4..594c87b14 100644
--- a/src/pystencils/backend/platforms/sycl.py
+++ b/src/pystencils/backend/platforms/sycl.py
@@ -19,7 +19,7 @@ from ..ast.expressions import (
     PsLe,
     PsTernary,
     PsLookup,
-    PsBufferAcc
+    PsBufferAcc,
 )
 from ..extensions.cpp import CppMethodCall
 
@@ -30,19 +30,21 @@ from ..exceptions import MaterializationError
 from ...types import PsCustomType, PsIeeeFloatType, constify, PsIntegerType
 
 if TYPE_CHECKING:
-    from ...codegen import GpuOptions, GpuThreadsRange
+    from ...codegen import GpuThreadsRange
 
 
 class SyclPlatform(GenericGpu):
 
     def __init__(
-        self, ctx: KernelCreationContext, indexing_cfg: GpuOptions | None = None
+        self,
+        ctx: KernelCreationContext,
+        omit_range_check: bool = False,
+        automatic_block_size: bool = False
     ):
         super().__init__(ctx)
 
-        from ...codegen.config import GpuOptions
-
-        self._cfg = indexing_cfg if indexing_cfg is not None else GpuOptions()
+        self._omit_range_check = omit_range_check
+        self._automatic_block_size = automatic_block_size
 
     @property
     def required_headers(self) -> set[str]:
@@ -138,7 +140,7 @@ class SyclPlatform(GenericGpu):
             indexing_decls.append(
                 PsDeclaration(ctr, dim.start + work_item_idx * dim.step)
             )
-            if not self._cfg.omit_range_check:
+            if not self._omit_range_check:
                 conds.append(PsLt(ctr, dim.stop))
 
         if conds:
@@ -156,7 +158,7 @@ class SyclPlatform(GenericGpu):
         self, body: PsBlock, ispace: SparseIterationSpace
     ) -> tuple[PsBlock, GpuThreadsRange]:
         factory = AstFactory(self._ctx)
-        
+
         id_type = PsCustomType("sycl::id< 1 >", const=True)
         id_symbol = PsExpression.make(self._ctx.get_symbol("id", id_type))
 
@@ -184,7 +186,7 @@ class SyclPlatform(GenericGpu):
         ]
         body.statements = mappings + body.statements
 
-        if not self._cfg.omit_range_check:
+        if not self._omit_range_check:
             stop = PsExpression.make(ispace.index_list.shape[0])
             condition = PsLt(sparse_ctr, stop)
             ast = PsBlock([sparse_idx_decl, PsConditional(condition, body)])
@@ -195,7 +197,7 @@ class SyclPlatform(GenericGpu):
         return ast, self.threads_from_ispace(ispace)
 
     def _item_type(self, rank: int):
-        if not self._cfg.sycl_automatic_block_size:
+        if not self._automatic_block_size:
             return PsCustomType(f"sycl::nd_item< {rank} >", const=True)
         else:
             return PsCustomType(f"sycl::item< {rank} >", const=True)
@@ -207,7 +209,7 @@ class SyclPlatform(GenericGpu):
         item_type = self._item_type(rank)
         item = PsExpression.make(self._ctx.get_symbol("sycl_item", item_type))
 
-        if not self._cfg.sycl_automatic_block_size:
+        if not self._automatic_block_size:
             rhs = CppMethodCall(item, "get_global_id", self._id_type(rank))
         else:
             rhs = CppMethodCall(item, "get_id", self._id_type(rank))
diff --git a/src/pystencils/backend/transformations/add_pragmas.py b/src/pystencils/backend/transformations/add_pragmas.py
index b033e4d58..f44b89c72 100644
--- a/src/pystencils/backend/transformations/add_pragmas.py
+++ b/src/pystencils/backend/transformations/add_pragmas.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import TYPE_CHECKING
 
 from typing import Sequence
 from collections import defaultdict
@@ -10,8 +9,6 @@ from ..ast import PsAstNode
 from ..ast.structural import PsBlock, PsLoop, PsPragma
 from ..ast.expressions import PsExpression
 
-if TYPE_CHECKING:
-    from ...codegen.config import OpenMpOptions
 
 __all__ = ["InsertPragmasAtLoops", "LoopPragma", "AddOpenMP"]
 
@@ -105,19 +102,37 @@ class AddOpenMP:
     `OpenMpConfig` configuration.
     """
 
-    def __init__(self, ctx: KernelCreationContext, omp_params: OpenMpOptions) -> None:
+    def __init__(
+        self,
+        ctx: KernelCreationContext,
+        nesting_depth: int = 0,
+        num_threads: int | None = None,
+        schedule: str | None = None,
+        collapse: int | None = None,
+        omit_parallel: bool = False,
+    ) -> None:
         pragma_text = "omp"
-        pragma_text += " parallel" if not omp_params.omit_parallel_construct else ""
-        pragma_text += f" for schedule({omp_params.schedule})"
 
-        if omp_params.num_threads is not None:
-            pragma_text += f" num_threads({str(omp_params.num_threads)})"
+        if not omit_parallel:
+            pragma_text += " parallel"
+
+        pragma_text += " for"
+
+        if schedule is not None:
+            pragma_text += f" schedule({schedule})"
+
+        if num_threads is not None:
+            pragma_text += f" num_threads({str(num_threads)})"
 
-        if omp_params.collapse > 0:
-            pragma_text += f" collapse({str(omp_params.collapse)})"
+        if collapse is not None:
+            if collapse <= 0:
+                raise ValueError(
+                    f"Invalid value for OpenMP `collapse` clause: {collapse}"
+                )
+            pragma_text += f" collapse({str(collapse)})"
 
         self._insert_pragmas = InsertPragmasAtLoops(
-            ctx, [LoopPragma(pragma_text, omp_params.nesting_depth)]
+            ctx, [LoopPragma(pragma_text, nesting_depth)]
         )
 
     def __call__(self, node: PsAstNode) -> PsAstNode:
diff --git a/src/pystencils/codegen/__init__.py b/src/pystencils/codegen/__init__.py
index da33f9ee2..3780527c6 100644
--- a/src/pystencils/codegen/__init__.py
+++ b/src/pystencils/codegen/__init__.py
@@ -2,7 +2,7 @@ from .target import Target
 from .config import (
     CreateKernelConfig,
     CpuOptions,
-    VectorizationConfig,
+    VectorizationOptions,
     OpenMpOptions,
     GpuOptions,
     AUTO,
@@ -15,7 +15,7 @@ __all__ = [
     "Target",
     "CreateKernelConfig",
     "CpuOptions",
-    "VectorizationConfig",
+    "VectorizationOptions",
     "OpenMpOptions",
     "GpuOptions",
     "AUTO",
diff --git a/src/pystencils/codegen/config.py b/src/pystencils/codegen/config.py
index 4e17ccc48..9abf51222 100644
--- a/src/pystencils/codegen/config.py
+++ b/src/pystencils/codegen/config.py
@@ -4,10 +4,9 @@ from typing import TYPE_CHECKING
 from warnings import warn
 from abc import ABC
 from collections.abc import Collection
-from copy import copy
 
 from typing import Sequence, Generic, TypeVar, Callable, Any, cast
-from dataclasses import dataclass, InitVar, replace, fields
+from dataclasses import dataclass, InitVar, fields
 
 from .target import Target
 from ..field import Field, FieldType
@@ -15,7 +14,6 @@ from ..field import Field, FieldType
 from ..types import (
     PsIntegerType,
     UserTypeSpec,
-    PsIeeeFloatType,
     PsScalarType,
     create_type,
 )
@@ -26,10 +24,6 @@ if TYPE_CHECKING:
     from ..jit import JitBase
 
 
-class PsOptionsError(Exception):
-    """Indicates an option clash in the `CreateKernelConfig`."""
-
-
 Option_T = TypeVar("Option_T")
 Arg_T = TypeVar("Arg_T")
 
@@ -51,6 +45,9 @@ class Option(Generic[Option_T, Arg_T]):
     Through the validator, options may also be set from arguments of a different type (`Arg_T`)
     than their value type (`Option_T`). If `Arg_T` is different from `Option_T`,
     the validator must perform the conversion from the former to the latter.
+
+    .. note::
+        `Arg_T` must always be a supertype of `Option_T`.
     """
 
     def __init__(
@@ -102,10 +99,32 @@ class Option(Generic[Option_T, Arg_T]):
         delattr(obj, self._lookup)
 
 
-class BasicOption(Option[Option_T, Option_T]): ...
+class BasicOption(Option[Option_T, Option_T]): ...  # noqa: E701
 
 
 class ConfigBase(ABC):
+    """Base class for configuration categories.
+
+    This class implements query and retrieval mechanism for configuration options,
+    as well as deepcopy functionality for categories.
+
+    Subclasses of `ConfigBase` must be `dataclasses`,
+    and all of their instance fields must have one of two descriptors types:
+    - Either `Option`, for scalar options;
+    - Or `Category` for option subcategories.
+
+    `Option` fields must be assigned immutable values, but are otherwise unconstrained.
+    `Category` subobjects must be subclasses of `ConfigBase`.
+
+    **Retrieval** Options set to `None` are considered *unset*, i.e. the user has not provided a value.
+    Through the `Option` descriptor, these options can still have a default value.
+    To retrieve either the user-set value if one exists, or the default value otherwise, use `get_option`.
+
+    **Deep-Copy** When a configuration object is copied, all of its subcategories must be copied along with it,
+    such that changes in the original do no affect the copy, and vice versa.
+    Such a deep copy is performed by the `copy <ConfigBase.copy>` method.
+    """
+
     def get_option(self, name: str) -> Any:
         """Get the value set for the specified option, or the option's default value if none has been set."""
         descr: Option = type(self).__dict__[name]
@@ -125,6 +144,31 @@ class ConfigBase(ABC):
                 if new_val is not None:
                     setattr(self, f.name, new_val)
 
+    def copy(self):
+        """Perform a semi-deep copy of this configuration object.
+
+        This will recursively copy any config subobjects
+        (categories, i.e. subclasses of `ConfigBase` wrapped in the `Category` descriptor)
+        nested in this configuration object. Any other fields will be copied by reference.
+        """
+
+        #   IMPLEMENTATION NOTES
+        #
+        #   We do not need to call `copy` on any subcategories here, since the `Category`
+        #   descriptor already calls `copy` in its `__set__` method,
+        #   which is invoked during the constructor call in the `return` statement.
+        #   Calling `copy` here would result in copying category objects twice.
+        #
+        #   We cannot use the standard library `copy.copy` here, since it merely duplicates
+        #   the instance dictionary and does not call the constructor.
+
+        config_fields = fields(self)  # type: ignore
+        kwargs = dict()
+        for field in config_fields:
+            val = getattr(self, field.name)
+            kwargs[field.name] = val
+        return type(self)(**kwargs)
+
 
 Category_T = TypeVar("Category_T", bound=ConfigBase)
 
@@ -151,7 +195,7 @@ class Category(Generic[Category_T]):
         return cast(Category_T, getattr(obj, self._lookup, None))
 
     def __set__(self, obj, cat: Category_T):
-        setattr(obj, self._lookup, copy(cat))
+        setattr(obj, self._lookup, cat.copy())
 
 
 class _AUTO_TYPE: ...  # noqa: E701
@@ -191,15 +235,9 @@ class OpenMpOptions(ConfigBase):
     Use this option only if you intend to wrap the kernel into an external ``#pragma omp parallel`` region.
     """
 
-    def __post_init__(self):
-        if self.omit_parallel_construct and self.num_threads is not None:
-            raise PsOptionsError(
-                "Cannot specify `num_threads` if `omit_parallel_construct` is set."
-            )
-
 
 @dataclass
-class VectorizationConfig(ConfigBase):
+class VectorizationOptions(ConfigBase):
     """Configuration for the auto-vectorizer.
 
     If any flag in this configuration is set to a value not supported by the CPU specified
@@ -277,7 +315,7 @@ class CpuOptions(ConfigBase):
     """Options governing OpenMP-instrumentation.
     """
 
-    vectorize: Category[OpenMpOptions] = Category(OpenMpOptions())
+    vectorize: Category[VectorizationOptions] = Category(VectorizationOptions())
     """Options governing intrinsic vectorization.
     """
 
@@ -294,14 +332,6 @@ class CpuOptions(ConfigBase):
     to produce cacheline zeroing instructions where possible.
     """
 
-    def get_vectorization_config(self) -> VectorizationConfig | None:
-        if self.vectorize is True:
-            return VectorizationConfig()
-        elif isinstance(self.vectorize, VectorizationConfig):
-            return self.vectorize
-        else:
-            return None
-
 
 @dataclass
 class GpuOptions(ConfigBase):
@@ -326,8 +356,13 @@ class GpuOptions(ConfigBase):
     The launch grid will then have to be specified manually at runtime.
     """
 
-    sycl_automatic_block_size: BasicOption[bool] = BasicOption(True)
-    """If set to `True` while generating for `Target.SYCL`, let the SYCL runtime decide on the block size.
+
+@dataclass
+class SyclOptions(ConfigBase):
+    """Options specific to the `SYCL <Target.SYCL>` target."""
+
+    automatic_block_size: BasicOption[bool] = BasicOption(True)
+    """If set to `True`, let the SYCL runtime decide on the block size.
 
     If set to `True`, the kernel is generated for execution via
     `parallel_for <https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_invoke>`_
@@ -431,12 +466,15 @@ class CreateKernelConfig(ConfigBase):
 
     """Target-Specific Options"""
 
-    cpu_optim: Category[CpuOptions] = Category(CpuOptions())
+    cpu: Category[CpuOptions] = Category(CpuOptions())
     """Options for CPU kernels."""
 
-    gpu_indexing: Category[GpuOptions] = Category(GpuOptions())
+    gpu: Category[GpuOptions] = Category(GpuOptions())
     """Options for GPU Kernels."""
 
+    sycl: Category[SyclOptions] = Category(SyclOptions())
+    """Options for SYCL kernels."""
+
     @index_dtype.validate
     def validate_index_type(self, spec: UserTypeSpec):
         dtype = create_type(spec)
@@ -496,11 +534,8 @@ class CreateKernelConfig(ConfigBase):
                 try:
                     from ..jit.gpu_cupy import CupyJit
 
-                    if (
-                        self.gpu_indexing is not None
-                        and self.gpu_indexing.block_size is not None
-                    ):
-                        return CupyJit(self.gpu_indexing.block_size)
+                    if self.gpu is not None and self.gpu.block_size is not None:
+                        return CupyJit(self.gpu.block_size)
                     else:
                         return CupyJit()
 
@@ -533,8 +568,6 @@ class CreateKernelConfig(ConfigBase):
         cpu_vectorize_info: dict | None,
         gpu_indexing_params: dict | None,
     ):  # pragma: no cover
-        optim: CpuOptions | None = None
-
         if data_type is not None:
             _deprecated_option("data_type", "default_dtype")
             warn(
@@ -546,27 +579,33 @@ class CreateKernelConfig(ConfigBase):
 
         if cpu_openmp is not None:
             _deprecated_option("cpu_openmp", "cpu_optim.openmp")
+            warn(
+                "Setting the deprecated `cpu_openmp` option will override any options "
+                "passed in the `cpu.openmp` category.",
+                UserWarning,
+            )
 
-            deprecated_omp: OpenMpOptions | bool
+            deprecated_omp = OpenMpOptions()
             match cpu_openmp:
                 case True:
-                    deprecated_omp = OpenMpOptions()
+                    deprecated_omp.enable = False
                 case False:
-                    deprecated_omp = False
+                    deprecated_omp.enable = False
                 case int():
-                    deprecated_omp = OpenMpOptions(num_threads=cpu_openmp)
+                    deprecated_omp.enable = True
+                    deprecated_omp.num_threads = cpu_openmp
                 case _:
-                    raise PsOptionsError(
+                    raise ValueError(
                         f"Invalid option for `cpu_openmp`: {cpu_openmp}"
                     )
 
-            optim = CpuOptions(openmp=deprecated_omp)
+            self.cpu.openmp = deprecated_omp
 
         if cpu_vectorize_info is not None:
             _deprecated_option("cpu_vectorize_info", "cpu_optim.vectorize")
             if "instruction_set" in cpu_vectorize_info:
                 if self.target != Target.GenericCPU:
-                    raise PsOptionsError(
+                    raise ValueError(
                         "Setting 'instruction_set' in the deprecated 'cpu_vectorize_info' option is only "
                         "valid if `target == Target.CPU`."
                     )
@@ -585,7 +624,7 @@ class CreateKernelConfig(ConfigBase):
                     case "avx512vl":
                         vec_target = Target.X86_AVX512 | Target._VL
                     case _:
-                        raise PsOptionsError(
+                        raise ValueError(
                             f'Value {isa} in `cpu_vectorize_info["instruction_set"]` is not supported.'
                         )
 
@@ -598,7 +637,14 @@ class CreateKernelConfig(ConfigBase):
 
                 self.target = vec_target
 
-            deprecated_vec_opts = VectorizationConfig(
+            warn(
+                "Setting the deprecated `cpu_vectorize_info` will override any options "
+                "passed in the `cpu.vectorize` category.",
+                UserWarning,
+            )
+
+            deprecated_vec_opts = VectorizationOptions(
+                enable=True,
                 assume_inner_stride_one=cpu_vectorize_info.get(
                     "assume_inner_stride_one", False
                 ),
@@ -606,28 +652,16 @@ class CreateKernelConfig(ConfigBase):
                 use_nontemporal_stores=cpu_vectorize_info.get("nontemporal", False),
             )
 
-            if optim is not None:
-                optim = replace(optim, vectorize=deprecated_vec_opts)
-            else:
-                optim = CpuOptions(vectorize=deprecated_vec_opts)
-
-        if optim is not None:
-            if self.cpu_optim is not None:
-                raise PsOptionsError(
-                    "Cannot specify both `cpu_optim` and a deprecated legacy optimization option at the same time."
-                )
-            else:
-                self.cpu_optim = optim
+            self.cpu.vectorize = deprecated_vec_opts
 
         if gpu_indexing_params is not None:
             _deprecated_option("gpu_indexing_params", "gpu_indexing")
+            warn(
+                "Setting the deprecated `gpu_indexing_params` will override any options "
+                "passed in the `gpu` category."
+            )
 
-            if self.gpu_indexing is not None:
-                raise PsOptionsError(
-                    "Cannot specify both `gpu_indexing` and the deprecated `gpu_indexing_params` at the same time."
-                )
-
-            self.gpu_indexing = GpuOptions(
+            self.gpu = GpuOptions(
                 block_size=gpu_indexing_params.get("block_size", None)
             )
 
diff --git a/src/pystencils/codegen/driver.py b/src/pystencils/codegen/driver.py
index 47bcb905d..6f44e718d 100644
--- a/src/pystencils/codegen/driver.py
+++ b/src/pystencils/codegen/driver.py
@@ -3,7 +3,14 @@ from typing import cast, Sequence, Iterable, TYPE_CHECKING
 from dataclasses import dataclass, replace
 
 from .target import Target
-from .config import CreateKernelConfig, OpenMpOptions, VectorizationConfig, AUTO, GhostLayerSpec, IterationSliceSpec
+from .config import (
+    CreateKernelConfig,
+    VectorizationOptions,
+    AUTO,
+    _AUTO_TYPE,
+    GhostLayerSpec,
+    IterationSliceSpec,
+)
 from .kernel import Kernel, GpuKernel, GpuThreadsRange
 from .properties import PsSymbolProperty, FieldShape, FieldStride, FieldBasePtr
 from .parameters import Parameter
@@ -115,15 +122,17 @@ class DefaultKernelCreationDriver:
             int(cfg.is_option_set("ghost_layers"))
             + int(cfg.is_option_set("iteration_slice"))
             + int(cfg.is_option_set("index_field"))
-        ) 
-        
+        )
+
         if num_ispace_options_set > 1:
             raise ValueError(
                 "At most one of the options 'ghost_layers' 'iteration_slice' and 'index_field' may be set."
             )
-        
+
         self._ghost_layers: GhostLayerSpec | None = cfg.get_option("ghost_layers")
-        self._iteration_slice: IterationSliceSpec | None = cfg.get_option("iteration_slice")
+        self._iteration_slice: IterationSliceSpec | None = cfg.get_option(
+            "iteration_slice"
+        )
         self._index_field: Field | None = cfg.get_option("index_field")
 
         if num_ispace_options_set == 0:
@@ -235,22 +244,26 @@ class DefaultKernelCreationDriver:
         )
         analysis(assignments)
 
-        if self._cfg.index_field is not None:
+        if self._index_field is not None:
             ispace = create_sparse_iteration_space(
                 self._ctx, assignments, index_field=self._cfg.index_field
             )
         else:
-            gls = self._cfg.ghost_layers
-            islice = self._cfg.iteration_slice
-
-            if gls is None and islice is None:
-                gls = AUTO
+            gls: GhostLayerSpec | None
+            if self._ghost_layers == AUTO:
+                infer_gls = True
+                gls = None
+            else:
+                assert not isinstance(self._ghost_layers, _AUTO_TYPE)
+                infer_gls = False
+                gls = self._ghost_layers
 
             ispace = create_full_iteration_space(
                 self._ctx,
                 assignments,
                 ghost_layers=gls,
-                iteration_slice=islice,
+                iteration_slice=self._iteration_slice,
+                infer_ghost_layers=infer_gls,
             )
 
         self._ctx.set_iteration_space(ispace)
@@ -279,7 +292,7 @@ class DefaultKernelCreationDriver:
         if self._intermediates is not None:
             self._intermediates.cpu_hoist_invariants = kernel_ast.clone()
 
-        cpu_cfg = self._cfg.cpu_optim
+        cpu_cfg = self._cfg.cpu
 
         if cpu_cfg is None:
             return kernel_ast
@@ -288,30 +301,41 @@ class DefaultKernelCreationDriver:
             raise NotImplementedError("Loop blocking not implemented yet.")
 
         kernel_ast = self._vectorize(kernel_ast)
+        kernel_ast = self._add_openmp(kernel_ast)
+
+        if cpu_cfg.use_cacheline_zeroing:
+            raise NotImplementedError("CL-zeroing not implemented yet")
+
+        return kernel_ast
+
+    def _add_openmp(self, kernel_ast: PsBlock) -> PsBlock:
+        omp_options = self._cfg.cpu.openmp
+        enable_omp: bool = omp_options.get_option("enable")
 
-        if cpu_cfg.openmp is not False:
+        if enable_omp:
             from ..backend.transformations import AddOpenMP
 
-            params = (
-                cpu_cfg.openmp
-                if isinstance(cpu_cfg.openmp, OpenMpOptions)
-                else OpenMpOptions()
+            add_omp = AddOpenMP(
+                self._ctx,
+                nesting_depth=omp_options.get_option("nesting_depth"),
+                num_threads=omp_options.get_option("num_threads"),
+                schedule=omp_options.get_option("schedule"),
+                collapse=omp_options.get_option("collapse"),
+                omit_parallel=omp_options.get_option("omit_parallel_construct"),
             )
-            add_omp = AddOpenMP(self._ctx, params)
             kernel_ast = cast(PsBlock, add_omp(kernel_ast))
 
             if self._intermediates is not None:
                 self._intermediates.cpu_openmp = kernel_ast.clone()
 
-        if cpu_cfg.use_cacheline_zeroing:
-            raise NotImplementedError("CL-zeroing not implemented yet")
-
         return kernel_ast
 
     def _vectorize(self, kernel_ast: PsBlock) -> PsBlock:
-        assert self._cfg.cpu_optim is not None
-        vec_config = self._cfg.cpu_optim.get_vectorization_config()
-        if vec_config is None:
+        vec_options = self._cfg.cpu.vectorize
+
+        enable_vec = vec_options.get_option("enable")
+
+        if not enable_vec:
             return kernel_ast
 
         from ..backend.transformations import LoopVectorizer, SelectIntrinsics
@@ -328,7 +352,9 @@ class DefaultKernelCreationDriver:
         inner_loop_dim = ispace.dimensions[inner_loop_coord]
 
         #   Apply stride (TODO: and alignment) assumptions
-        if vec_config.assume_inner_stride_one:
+        assume_unit_stride: bool = vec_options.get_option("assume_inner_stride_one")
+
+        if assume_unit_stride:
             for field in self._ctx.fields:
                 buf = self._ctx.get_buffer(field)
                 inner_stride = buf.strides[inner_loop_coord]
@@ -344,14 +370,14 @@ class DefaultKernelCreationDriver:
                     #   TODO: Communicate assumption to runtime system via a precondition
 
         #   Call loop vectorizer
-        if vec_config.lanes is None:
-            lanes = VectorizationConfig.default_lanes(
+        num_lanes: int | None = vec_options.get_option("lanes")
+
+        if num_lanes is None:
+            num_lanes = VectorizationOptions.default_lanes(
                 self._target, cast(PsScalarType, self._ctx.default_dtype)
             )
-        else:
-            lanes = vec_config.lanes
 
-        vectorizer = LoopVectorizer(self._ctx, lanes)
+        vectorizer = LoopVectorizer(self._ctx, num_lanes)
 
         def loop_predicate(loop: PsLoop):
             return loop.counter.symbol == inner_loop_dim.counter
@@ -397,15 +423,30 @@ class DefaultKernelCreationDriver:
                 )
 
         elif Target._GPU in self._target:
+            gpu_opts = self._cfg.gpu
+            omit_range_check: bool = gpu_opts.get_option("omit_range_check")
+
             match self._target:
                 case Target.SYCL:
                     from ..backend.platforms import SyclPlatform
 
-                    return SyclPlatform(self._ctx, self._cfg.gpu_indexing)
+                    auto_block_size: bool = self._cfg.sycl.get_option("automatic_block_size")
+
+                    return SyclPlatform(
+                        self._ctx,
+                        omit_range_check=omit_range_check,
+                        automatic_block_size=auto_block_size,
+                    )
                 case Target.CUDA:
                     from ..backend.platforms import CudaPlatform
 
-                    return CudaPlatform(self._ctx, self._cfg.gpu_indexing)
+                    manual_grid = gpu_opts.get_option("manual_launch_grid")
+
+                    return CudaPlatform(
+                        self._ctx,
+                        omit_range_check=omit_range_check,
+                        manual_launch_grid=manual_grid,
+                    )
 
         raise NotImplementedError(
             f"Code generation for target {self._target} not implemented"
diff --git a/src/pystencils/types/types.py b/src/pystencils/types/types.py
index 7645a452f..825ac1d5d 100644
--- a/src/pystencils/types/types.py
+++ b/src/pystencils/types/types.py
@@ -35,7 +35,7 @@ class PsCustomType(PsType):
         return self._name
 
     def c_string(self) -> str:
-        return f"{self._const_string()} {self._name}"
+        return f"{self._const_string()}{self._name}"
 
     def __repr__(self) -> str:
         return f"CustomType( {self.name}, const={self.const} )"
diff --git a/tests/codegen/test_config.py b/tests/codegen/test_config.py
index 715830e70..0f7591b3e 100644
--- a/tests/codegen/test_config.py
+++ b/tests/codegen/test_config.py
@@ -2,8 +2,17 @@ import pytest
 
 from dataclasses import dataclass
 import numpy as np
-from pystencils.codegen.config import BasicOption, Option, Category, ConfigBase, CreateKernelConfig
-from pystencils.types.quick import Int, UInt
+from pystencils.codegen.config import (
+    BasicOption,
+    Option,
+    Category,
+    ConfigBase,
+    CreateKernelConfig,
+    CpuOptions
+)
+from pystencils.field import Field, FieldType
+from pystencils.types.quick import Int, UInt, Fp, Ptr
+from pystencils.types import PsVectorType
 
 
 def test_descriptors():
@@ -11,16 +20,19 @@ def test_descriptors():
     @dataclass
     class SampleCategory(ConfigBase):
         val1: BasicOption[int] = BasicOption(2)
-        val2: Option[bool, str] = Option(False)
+        val2: Option[bool, str | bool] = Option(False)
 
         @val2.validate
-        def validate_val2(self, v: str):
-            if v.lower() in ("off", "false", "no"):
-                return False
-            elif v.lower() in ("on", "true", "yes"):
-                return True
-            
-            raise ValueError()
+        def validate_val2(self, v: str | bool):
+            if isinstance(v, str):
+                if v.lower() in ("off", "false", "no"):
+                    return False
+                elif v.lower() in ("on", "true", "yes"):
+                    return True
+
+                raise ValueError()
+            else:
+                return v
 
     @dataclass
     class SampleConfig(ConfigBase):
@@ -28,7 +40,7 @@ def test_descriptors():
         val: BasicOption[str] = BasicOption("fallback")
 
     cfg = SampleConfig()
-    
+
     #   Check unset and default values
     assert cfg.val is None
     assert cfg.get_option("val") == "fallback"
@@ -55,13 +67,41 @@ def test_descriptors():
     cfg.cat = c
     assert cfg.cat.val1 == 32
     assert cfg.cat.val2 is True
-    
+
     assert cfg.cat is not c
     c.val1 = 13
     assert cfg.cat.val1 == 32
 
+    #   Check that category objects on two config objects are not the same
+    cfg1 = SampleConfig()
+    cfg2 = SampleConfig()
+
+    assert cfg1.cat is not cfg2.cat
+
+
+def test_category_init():
+    cfg1 = CreateKernelConfig()
+    cfg2 = CreateKernelConfig()
+
+    assert cfg1.cpu is not cfg2.cpu
+    assert cfg1.cpu.openmp is not cfg2.cpu.openmp
+    assert cfg1.cpu.vectorize is not cfg2.cpu.vectorize
+    assert cfg1.gpu is not cfg2.gpu
+
+
+def test_category_copy():
+    cfg = CreateKernelConfig()
+    cpu_repl = CpuOptions()
+    cpu_repl.openmp.num_threads = 42
+
+    cfg.cpu = cpu_repl
+    assert cfg.cpu.openmp.num_threads == 42
+    assert cfg.cpu is not cpu_repl
+    assert cfg.cpu.openmp is not cpu_repl.openmp
+
 
 def test_config_validation():
+    #   Check index dtype validation
     cfg = CreateKernelConfig(index_dtype="int32")
     assert cfg.index_dtype == Int(32)
     cfg.index_dtype = np.uint64
@@ -72,3 +112,27 @@ def test_config_validation():
 
     with pytest.raises(ValueError):
         cfg.index_dtype = "double"
+
+    #   Check default dtype validation
+    cfg = CreateKernelConfig(default_dtype="float32")
+    assert cfg.default_dtype == Fp(32)
+    cfg.default_dtype = np.int64
+    assert cfg.default_dtype == Int(64)
+
+    with pytest.raises(ValueError):
+        cfg.default_dtype = PsVectorType(Fp(64), 4)
+
+    with pytest.raises(ValueError):
+        _ = CreateKernelConfig(default_dtype=Ptr(Fp(32)))
+
+    #   Check index field validation
+    idx_field = Field.create_generic(
+        "idx", spatial_dimensions=1, field_type=FieldType.INDEXED
+    )
+    cfg.index_field = idx_field
+    assert cfg.index_field == idx_field
+
+    with pytest.raises(ValueError):
+        cfg.index_field = Field.create_generic(
+            "idx", spatial_dimensions=1, field_type=FieldType.GENERIC
+        )
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 8c7f12015..71e54bad8 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -31,15 +31,17 @@ except ImportError:
 AVAILABLE_TARGETS += ps.Target.available_vector_cpu_targets()
 TARGET_IDS = [t.name for t in AVAILABLE_TARGETS]
 
+
 @pytest.fixture(params=AVAILABLE_TARGETS, ids=TARGET_IDS)
 def target(request) -> ps.Target:
     """Provides all code generation targets available on the current hardware"""
     return request.param
 
+
 @pytest.fixture
 def gen_config(target: ps.Target):
     """Default codegen configuration for the current target.
-    
+
     For GPU targets, set default indexing options.
     For vector-CPU targets, set default vectorization config.
     """
@@ -47,25 +49,24 @@ def gen_config(target: ps.Target):
     gen_config = ps.CreateKernelConfig(target=target)
 
     if target.is_vector_cpu():
-        gen_config = replace(
-            gen_config,
-            cpu_optim=ps.CpuOptions(
-                vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
-            ),
-        )
+        gen_config.cpu.vectorize.enable = True
+        gen_config.cpu.vectorize.assume_inner_stride_one = True
 
     return gen_config
 
+
 @pytest.fixture()
 def xp(target: ps.Target) -> ModuleType:
     """Primary array module for the current target.
-    
+
     Returns:
         `cupy` if `target == Target.CUDA`, and `numpy` otherwise
     """
     if target == ps.Target.CUDA:
         import cupy as xp
+
         return xp
     else:
         import numpy as np
+
         return np
diff --git a/tests/kernelcreation/test_buffer_gpu.py b/tests/kernelcreation/test_buffer_gpu.py
index 873cc1213..db8cc2596 100644
--- a/tests/kernelcreation/test_buffer_gpu.py
+++ b/tests/kernelcreation/test_buffer_gpu.py
@@ -300,7 +300,7 @@ def test_iteration_slices(gpu_indexing):
         gpu_dst_arr.fill(0)
 
         config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
-                                    gpu_indexing=gpu_indexing)
+                                    gpu=gpu_indexing)
 
         pack_code = create_kernel(pack_eqs, config=config)
         pack_kernel = pack_code.compile()
@@ -313,7 +313,7 @@ def test_iteration_slices(gpu_indexing):
             unpack_eqs.append(eq)
 
         config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
-                                    gpu_indexing=gpu_indexing)
+                                    gpu=gpu_indexing)
 
         unpack_code = create_kernel(unpack_eqs, config=config)
         unpack_kernel = unpack_code.compile()
diff --git a/tests/kernelcreation/test_gpu.py b/tests/kernelcreation/test_gpu.py
index 57de84b7a..97f0c0fa9 100644
--- a/tests/kernelcreation/test_gpu.py
+++ b/tests/kernelcreation/test_gpu.py
@@ -112,7 +112,7 @@ def test_ghost_layer():
     update_rule = Assignment(dst_field[0, 0], src_field[0, 0])
     ghost_layers = [(1, 2), (2, 1)]
 
-    config = CreateKernelConfig(target=Target.GPU, ghost_layers=ghost_layers, gpu_indexing="line")
+    config = CreateKernelConfig(target=Target.GPU, ghost_layers=ghost_layers, gpu="line")
     ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config)
     kernel = ast.compile()
 
@@ -135,7 +135,7 @@ def test_setting_value():
     f = Field.create_generic("f", 2)
     update_rule = [Assignment(f(0), sp.Symbol("value"))]
 
-    config = CreateKernelConfig(target=Target.GPU, gpu_indexing="line", iteration_slice=iteration_slice)
+    config = CreateKernelConfig(target=Target.GPU, gpu="line", iteration_slice=iteration_slice)
     ast = create_kernel(sympy_cse_on_assignment_list(update_rule), config=config)
     kernel = ast.compile()
 
@@ -207,7 +207,7 @@ def test_four_dimensional_kernel(gpu_indexing, layout, shape):
     f = Field.create_from_numpy_array("f", arr_cpu)
     update_rule = [Assignment(f.center, sp.Symbol("value"))]
 
-    config = CreateKernelConfig(target=Target.GPU, gpu_indexing=gpu_indexing, iteration_slice=iteration_slice)
+    config = CreateKernelConfig(target=Target.GPU, gpu=gpu_indexing, iteration_slice=iteration_slice)
     ast = create_kernel(update_rule, config=config)
     kernel = ast.compile()
 
diff --git a/tests/kernelcreation/test_iteration_slices.py b/tests/kernelcreation/test_iteration_slices.py
index 5c7b4d8cb..892d2d949 100644
--- a/tests/kernelcreation/test_iteration_slices.py
+++ b/tests/kernelcreation/test_iteration_slices.py
@@ -104,6 +104,9 @@ def test_symbolic_slice(gen_config: CreateKernelConfig, xp):
     update = Assignment(f.center(), 1)
     islice = make_slice[sy:ey, sx:ex]
     gen_config = replace(gen_config, iteration_slice=islice)
+
+    print(repr(gen_config))
+
     kernel = create_kernel(update, gen_config).compile()
 
     for slic in [make_slice[:, :], make_slice[1:-1, 2:-2], make_slice[8:14, 7:11]]:
@@ -140,9 +143,7 @@ def test_triangle_pattern(gen_config: CreateKernelConfig, xp):
     gen_config = replace(gen_config, iteration_slice=islice)
 
     if gen_config.target == Target.CUDA:
-        gen_config = replace(
-            gen_config, gpu_indexing=GpuOptions(manual_launch_grid=True)
-        )
+        gen_config.gpu.manual_launch_grid = True
 
     kernel = create_kernel(update, gen_config).compile()
 
@@ -170,12 +171,10 @@ def test_red_black_pattern(gen_config: CreateKernelConfig, xp):
     outer_counter = DEFAULTS.spatial_counters[0]
     start = sp.Piecewise((0, sp.Eq(int_rem(outer_counter, 2), 0)), (1, True))
     islice = make_slice[:, start::2]
-    gen_config = replace(gen_config, iteration_slice=islice)
+    gen_config.iteration_slice = islice
 
     if gen_config.target == Target.CUDA:
-        gen_config = replace(
-            gen_config, gpu_indexing=GpuOptions(manual_launch_grid=True)
-        )
+        gen_config.gpu.manual_launch_grid = True
 
     try:
         kernel = create_kernel(update, gen_config).compile()
diff --git a/tests/nbackend/kernelcreation/test_openmp.py b/tests/nbackend/kernelcreation/test_openmp.py
index ae775ca20..07a2f1026 100644
--- a/tests/nbackend/kernelcreation/test_openmp.py
+++ b/tests/nbackend/kernelcreation/test_openmp.py
@@ -15,20 +15,21 @@ from pystencils.backend.ast.structural import PsLoop, PsPragma
 
 @pytest.mark.parametrize("nesting_depth", range(3))
 @pytest.mark.parametrize("schedule", ["static", "static,16", "dynamic", "auto"])
-@pytest.mark.parametrize("collapse", range(3))
+@pytest.mark.parametrize("collapse", [None, 1, 2])
 @pytest.mark.parametrize("omit_parallel_construct", range(3))
 def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
     f, g = fields("f, g: [3D]")
     asm = Assignment(f.center(0), g.center(0))
 
     omp = OpenMpOptions(
+        enable=True,
         nesting_depth=nesting_depth,
         schedule=schedule,
         collapse=collapse,
         omit_parallel_construct=omit_parallel_construct,
     )
     gen_config = CreateKernelConfig(
-        target=Target.CPU, cpu_optim=CpuOptions(openmp=omp)
+        target=Target.CPU, cpu=CpuOptions(openmp=omp)
     )
 
     kernel = create_kernel(asm, gen_config)
@@ -55,7 +56,7 @@ def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
     expected_tokens = {"omp", "for", f"schedule({omp.schedule})"}
     if not omp.omit_parallel_construct:
         expected_tokens.add("parallel")
-    if omp.collapse > 0:
+    if omp.collapse is not None:
         expected_tokens.add(f"collapse({omp.collapse})")
 
     assert tokens == expected_tokens
diff --git a/tests/nbackend/kernelcreation/test_options.py b/tests/nbackend/kernelcreation/test_options.py
deleted file mode 100644
index fefcc98fe..000000000
--- a/tests/nbackend/kernelcreation/test_options.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import pytest
-
-from pystencils.field import Field, FieldType
-from pystencils.types.quick import *
-from pystencils.codegen.config import (
-    CreateKernelConfig,
-    PsOptionsError,
-)
-
-
-def test_invalid_iteration_region_options():
-    idx_field = Field.create_generic(
-        "idx", spatial_dimensions=1, field_type=FieldType.INDEXED
-    )
-    with pytest.raises(PsOptionsError):
-        CreateKernelConfig(
-            ghost_layers=2, iteration_slice=(slice(1, -1), slice(1, -1))
-        )
-    with pytest.raises(PsOptionsError):
-        CreateKernelConfig(ghost_layers=2, index_field=idx_field)
-
-
-def test_index_field_options():
-    with pytest.raises(PsOptionsError):
-        idx_field = Field.create_generic(
-            "idx", spatial_dimensions=1, field_type=FieldType.GENERIC
-        )
-        CreateKernelConfig(index_field=idx_field)
diff --git a/tests/nbackend/transformations/test_add_pragmas.py b/tests/nbackend/transformations/test_add_pragmas.py
index 1d8dd1ded..c1749fe28 100644
--- a/tests/nbackend/transformations/test_add_pragmas.py
+++ b/tests/nbackend/transformations/test_add_pragmas.py
@@ -12,6 +12,7 @@ from pystencils.backend.ast import dfs_preorder
 from pystencils.backend.ast.structural import PsBlock, PsPragma, PsLoop
 from pystencils.backend.transformations import InsertPragmasAtLoops, LoopPragma
 
+
 def test_insert_pragmas():
     ctx = KernelCreationContext()
     factory = AstFactory(ctx)
diff --git a/tests/test_quicktests.py b/tests/test_quicktests.py
index 3e7f4f071..d27a5e61b 100644
--- a/tests/test_quicktests.py
+++ b/tests/test_quicktests.py
@@ -74,8 +74,8 @@ def test_basic_vectorization():
     ast = ps.create_kernel(
         update_rule,
         target=target,
-        cpu_optim=ps.CpuOptions(
-            vectorize=ps.VectorizationConfig(assume_inner_stride_one=True)
+        cpu=ps.CpuOptions(
+            vectorize=ps.VectorizationOptions(enable=True, assume_inner_stride_one=True)
         ),
     )
 
-- 
GitLab


From 315cc8c0cdafb0b2f91a559c680f246cfb8b62e7 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Mon, 20 Jan 2025 15:02:11 +0100
Subject: [PATCH 4/7] Update documentation pages.

 - Update docs on config system
 - Strictly separate API reference from user guides
 - Rename "Reference Guides" to "User Manual"
 - Clean up API docs
 - Fix various docstring errors
---
 docs/Makefile                                 |  2 +-
 docs/source/api/codegen.rst                   | 38 ++++++++---
 docs/source/api/jit.rst                       |  4 +-
 docs/source/api/symbolic/assignments.md       | 16 +++++
 docs/source/api/{ => symbolic}/field.rst      |  4 +-
 docs/source/api/symbolic/index.md             |  9 +++
 .../api/{ => symbolic}/sympyextensions.rst    |  4 +-
 docs/source/{reference => api}/types.rst      |  0
 docs/source/contributing/index.md             |  2 +-
 docs/source/index.rst                         | 15 ++---
 docs/source/{migration.rst => migration.md}   | 67 +++++++++++++------
 .../{reference => user_manual}/gpu_kernels.md |  9 +--
 .../kernelcreation.md                         |  9 +--
 .../symbolic_language.rst                     |  7 +-
 src/pystencils/__init__.py                    |  8 ---
 .../backend/transformations/add_pragmas.py    |  3 +-
 src/pystencils/codegen/__init__.py            |  8 ---
 src/pystencils/codegen/config.py              | 44 ++++++------
 18 files changed, 144 insertions(+), 105 deletions(-)
 create mode 100644 docs/source/api/symbolic/assignments.md
 rename docs/source/api/{ => symbolic}/field.rst (97%)
 create mode 100644 docs/source/api/symbolic/index.md
 rename docs/source/api/{ => symbolic}/sympyextensions.rst (97%)
 rename docs/source/{reference => api}/types.rst (100%)
 rename docs/source/{migration.rst => migration.md} (54%)
 rename docs/source/{reference => user_manual}/gpu_kernels.md (97%)
 rename docs/source/{reference => user_manual}/kernelcreation.md (99%)
 rename docs/source/{reference => user_manual}/symbolic_language.rst (96%)

diff --git a/docs/Makefile b/docs/Makefile
index a293f14ee..0cfe1ab8b 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -22,7 +22,7 @@ html:
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 clean:
-	rm -rf source/reference/generated
 	rm -rf source/api/generated
+	rm -rf source/api/symbolic/generated
 	rm -rf source/backend/generated
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/docs/source/api/codegen.rst b/docs/source/api/codegen.rst
index d65e9a358..1fb83fe5f 100644
--- a/docs/source/api/codegen.rst
+++ b/docs/source/api/codegen.rst
@@ -1,5 +1,5 @@
-pystencils.codegen
-==================
+Code Generation
+===============
 
 .. module:: pystencils.codegen
 
@@ -15,16 +15,19 @@ Invocation
 Configuration
 -------------
 
+.. module:: pystencils.codegen.config
+
 .. autosummary::
   :toctree: generated
   :nosignatures:
-  :template: autosummary/entire_class.rst
+  :template: autosummary/recursive_class.rst
 
   CreateKernelConfig
-  CpuOptimConfig
-  OpenMpConfig
-  VectorizationConfig
-  GpuIndexingConfig
+  CpuOptions
+  OpenMpOptions
+  VectorizationOptions
+  GpuOptions
+  SyclOptions
 
 .. autosummary::
   :toctree: generated
@@ -32,9 +35,24 @@ Configuration
 
   AUTO
 
+.. dropdown:: Configuration System Implementation Details
+
+  .. autosummary::
+    :toctree: generated
+    :nosignatures:
+    :template: autosummary/entire_class.rst
+
+    Option
+    BasicOption
+    Category
+    ConfigBase
+
+
 Target Specification
 --------------------
 
+.. module:: pystencils.codegen.target
+
 .. autosummary::
   :toctree: generated
   :nosignatures:
@@ -45,12 +63,14 @@ Target Specification
 Code Generation Drivers
 -----------------------
 
+.. module:: pystencils.codegen.driver
+
 .. autosummary::
   :toctree: generated
   :nosignatures:
   :template: autosummary/entire_class.rst
 
-  driver.DefaultKernelCreationDriver
+  DefaultKernelCreationDriver
 
 .. autosummary::
   :toctree: generated
@@ -61,6 +81,8 @@ Code Generation Drivers
 Output Code Objects
 -------------------
 
+.. currentmodule:: pystencils.codegen
+
 .. autosummary::
   :toctree: generated
   :nosignatures:
diff --git a/docs/source/api/jit.rst b/docs/source/api/jit.rst
index 7bcd9989c..f2e271db3 100644
--- a/docs/source/api/jit.rst
+++ b/docs/source/api/jit.rst
@@ -1,5 +1,5 @@
-pystencils.jit
-==============
+JIT Compilation
+===============
 
 .. module:: pystencils.jit
 
diff --git a/docs/source/api/symbolic/assignments.md b/docs/source/api/symbolic/assignments.md
new file mode 100644
index 000000000..69446a8a5
--- /dev/null
+++ b/docs/source/api/symbolic/assignments.md
@@ -0,0 +1,16 @@
+# Assignments and AssignmentCollection
+
+```{eval-rst}
+
+.. py:class:: pystencils.Assignment
+
+    Monkeypatched variant of `sympy.codegen.ast.Assignment`.
+    Represents an assignment of an expression to a symbol.
+
+.. autosummary::
+    :toctree: generated
+    :nosignatures:
+    :template: autosummary/recursive_class.rst
+
+    pystencils.AssignmentCollection
+```
diff --git a/docs/source/api/field.rst b/docs/source/api/symbolic/field.rst
similarity index 97%
rename from docs/source/api/field.rst
rename to docs/source/api/symbolic/field.rst
index 79cc12a3a..33219c059 100644
--- a/docs/source/api/field.rst
+++ b/docs/source/api/symbolic/field.rst
@@ -1,5 +1,5 @@
-pystencils.field
-================
+Fields
+======
 
 .. module:: pystencils.field
 
diff --git a/docs/source/api/symbolic/index.md b/docs/source/api/symbolic/index.md
new file mode 100644
index 000000000..fad3df20b
--- /dev/null
+++ b/docs/source/api/symbolic/index.md
@@ -0,0 +1,9 @@
+# Symbolic Toolbox
+
+:::{toctree}
+:maxdepth: 1
+
+field
+assignments
+sympyextensions
+:::
diff --git a/docs/source/api/sympyextensions.rst b/docs/source/api/symbolic/sympyextensions.rst
similarity index 97%
rename from docs/source/api/sympyextensions.rst
rename to docs/source/api/symbolic/sympyextensions.rst
index d377f998e..e3d10fbdf 100644
--- a/docs/source/api/sympyextensions.rst
+++ b/docs/source/api/symbolic/sympyextensions.rst
@@ -1,5 +1,5 @@
-pystencils.sympyextensions
-==========================
+Extensions to SymPy
+===================
 
 .. module:: pystencils.sympyextensions
 
diff --git a/docs/source/reference/types.rst b/docs/source/api/types.rst
similarity index 100%
rename from docs/source/reference/types.rst
rename to docs/source/api/types.rst
diff --git a/docs/source/contributing/index.md b/docs/source/contributing/index.md
index 39e68b06f..04ad821ce 100644
--- a/docs/source/contributing/index.md
+++ b/docs/source/contributing/index.md
@@ -1,4 +1,4 @@
-# Contributor Guide
+# Contribution Guide
 
 Welcome to the Contributor's Guide to pystencils!
 If you are interested in contributing to the development of pystencils, this is the place to start.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 5ddec09f2..cb455c8b4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -77,19 +77,18 @@ Topics
 
 .. toctree::
   :maxdepth: 1
-  :caption: Reference Guides
+  :caption: User Manual
 
-  reference/symbolic_language
-  reference/kernelcreation
-  reference/gpu_kernels
-  reference/types
+  user_manual/symbolic_language
+  user_manual/kernelcreation
+  user_manual/gpu_kernels
 
 .. toctree::
   :maxdepth: 1
-  :caption: API
+  :caption: API Reference
 
-  api/field
-  api/sympyextensions
+  api/symbolic/index
+  api/types
   api/codegen
   api/jit
 
diff --git a/docs/source/migration.rst b/docs/source/migration.md
similarity index 54%
rename from docs/source/migration.rst
rename to docs/source/migration.md
index ea59d8881..c3cb17d0f 100644
--- a/docs/source/migration.rst
+++ b/docs/source/migration.md
@@ -1,36 +1,62 @@
-.. _page_v2_migration:
+---
+jupytext:
+  formats: md:myst
+  text_representation:
+    extension: .md
+    format_name: myst
+kernelspec:
+  display_name: Python 3 (ipykernel)
+  language: python
+  name: python3
+mystnb:
+  execution_mode: cache
+---
 
-***************************
-Version 2.0 Migration Guide
-***************************
+(_page_v2_migration)=
+# Version 2.0 Migration Guide
 
 With version 2.0, many APIs of *pystencils* will be changed; old interfaces are being deprecated
 and new systems are put in place.
 This page is a still-incomplete list of these changes, with advice on how to migrate your code
 from pystencils 1.x to pystencils 2.0.
 
-Kernel Creation
-===============
+```{code-cell} ipython3
+:tags: [remove-cell]
 
-Configuration
--------------
+import pystencils as ps
+```
 
-The API of `create_kernel`, and the configuration options of the `CreateKernelConfig`, have changed significantly:
+
+## Kernel Creation
+
+### Configuration
+
+The API of {any}`create_kernel`, and the configuration options of the {any}`CreateKernelConfig`, have changed significantly.
+The `CreateKernelConfig` class has been refined to be safe to copy and edit incrementally.
+The recommended way of setting up the code generator is now *incremental configuration*:
+
+```{code-cell} ipython3
+cfg = ps.CreateKernelConfig()
+cfg.default_dtype = "float32"
+cfg.cpu.openmp.enable = True
+cfg.cpu.openmp.num_threads = 8
+cfg.ghost_layers = 2
+```
 
 - *Data Types:* `CreateKernelConfig` now takes to parameters to control data types in your kernels:
   the ``default_dtype`` is applied to all numerical computations, while the ``index_dtype`` is used
   for all index calculations and loop counters.
+- *CPU Optimization Options:* Should now be set via the {any}`cpu <CpuOptions>` option category and its subcategories.
    
 .. dropdown:: Deprecated options of `CreateKernelConfig`
 
     - ``data_type``: Use ``default_dtype`` instead
-    - ``cpu_openmp``: Set OpenMP-Options via an `OpenMpConfig`  in the ``cpu_optim`` (`CpuOptimConfig`) instead.
-    - ``cpu_vectorize_info``: Set vectorization options via a `VectorizationConfig` in the ``cpu_optim`` option instead
-    - ``gpu_indexing_params``: Set GPU indexing options via a `GpuIndexingConfig` in the ``gpu_indexing`` option instead
+    - ``cpu_openmp``: Set OpenMP-Options in the `cpu.openmp <OpenMpOptions>` category instead.
+    - ``cpu_vectorize_info``: Set vectorization options in the `cpu.vectorize <VectorizationOptions>` category instead
+    - ``gpu_indexing_params``: Set GPU indexing options in the `gpu <GpuOptions>` category instead
 
 
-Type Checking
--------------
+### Type Checking
 
 The old type checking system of pystencils' code generator has been replaced by a new type inference and validation
 mechanism whose rules are much stricter than before.
@@ -38,24 +64,23 @@ While running `create_kernel`, you may now encounter a `TypificationError` where
 If this happens, it is probable that you have been doing some illegal, maybe dangerous, or at least unsafe things with data types
 (like inserting integers into a floating-point context without casting them, or mixing types of different precisions or signedness).
 If you are sure the error is not your fault, please file an issue at our
-`bug tracker <https://i10git.cs.fau.de/pycodegen/pystencils/-/issues>`_.
+[bug tracker](https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).
 
-Type System
-===========
+### Type System
 
-The ``pystencils.typing`` module has been entirely replaced by the new `pystencils.types` module,
+The ``pystencils.typing`` module has been entirely replaced by the new {any}`pystencils.types` module,
 which is home to a completely new type system.
-The primary interaction points with this system are still the `TypedSymbol` class and the `create_type` routine.
+The primary interaction points with this system are still the {any}`TypedSymbol` class and the {any}`create_type` routine.
 Code using any of these two should not require any changes, except:
 
 - *Importing `TypedSymbol` and `create_type`:* Both `TypedSymbol` and `create_type` should now be imported directly
   from the ``pystencils`` namespace.
 - *Custom data types:* `TypedSymbol` used to accept arbitrary strings as data types.
-  This is no longer possible; instead, import `pystencils.types.PsCustomType` and use it to describe
+  This is no longer possible; instead, import {any}`pystencils.types.PsCustomType` and use it to describe
   custom data types unknown to pystencils, as in ``TypedSymbol("xs", PsCustomType("std::vector< int >"))``
 
 All old data type classes (such as ``BasicType``, ``PointerType``, ``StructType``, etc.) have been removed
-and replaced by the class hierarchy below `PsType`.
+and replaced by the class hierarchy below {any}`PsType`.
 Directly using any of these type classes in the frontend is discouraged unless absolutely necessary;
 in most cases, `create_type` suffices.
 
diff --git a/docs/source/reference/gpu_kernels.md b/docs/source/user_manual/gpu_kernels.md
similarity index 97%
rename from docs/source/reference/gpu_kernels.md
rename to docs/source/user_manual/gpu_kernels.md
index 786840d18..4db2d7944 100644
--- a/docs/source/reference/gpu_kernels.md
+++ b/docs/source/user_manual/gpu_kernels.md
@@ -159,15 +159,10 @@ kernel = ps.create_kernel(assignments, cfg).compile()
 ```
 
 This warns us that the threads range could not be determined automatically.
-We can disable this warning by setting `manual_launch_grid` in the GPU indexing options:
+We can disable this warning by setting `manual_launch_grid` in the GPU option category:
 
 ```{code-cell}
-cfg = ps.CreateKernelConfig(
-    # ... other options ...
-    gpu_indexing=ps.GpuIndexingConfig(
-        manual_launch_grid=True
-    )
-)
+cfg.gpu.manual_launch_grid = True
 ```
 
 Now, to execute our kernel, we have to manually specify its launch grid:
diff --git a/docs/source/reference/kernelcreation.md b/docs/source/user_manual/kernelcreation.md
similarity index 99%
rename from docs/source/reference/kernelcreation.md
rename to docs/source/user_manual/kernelcreation.md
index 248855fc1..c85c8f99d 100644
--- a/docs/source/reference/kernelcreation.md
+++ b/docs/source/user_manual/kernelcreation.md
@@ -485,13 +485,10 @@ h = sp.Symbol("h")
 cfg = ps.CreateKernelConfig(
   target=ps.Target.X86_AVX512,
   default_dtype="float32",
-  cpu_optim=ps.CpuOptimConfig(
-    openmp=True,
-    vectorize=ps.VectorizationConfig(
-        assume_inner_stride_one=True
-    )
-  )
 )
+cfg.cpu.openmp.enable = True
+cfg.cpu.vectorize.enable = True
+cfg.cpu.vectorize.assume_inner_stride_one = True
 
 assignments = [
   ps.Assignment(
diff --git a/docs/source/reference/symbolic_language.rst b/docs/source/user_manual/symbolic_language.rst
similarity index 96%
rename from docs/source/reference/symbolic_language.rst
rename to docs/source/user_manual/symbolic_language.rst
index 63b94e04d..6d219306e 100644
--- a/docs/source/reference/symbolic_language.rst
+++ b/docs/source/user_manual/symbolic_language.rst
@@ -42,10 +42,6 @@ Assignments are the fundamental components of pystencils kernels;
 they are used both for assigning expressions to symbols
 and for writing values to fields.
 
-.. py:class:: pystencils.Assignment
-
-    Slightly monkey-patched version of `sympy.codegen.ast.Assignment`.
-
 Assignments are combined and structured inside `assignment collections <pystencils.AssignmentCollection>`.
 An assignment collection contains two separate lists of assignments:
 
@@ -56,10 +52,9 @@ An assignment collection contains two separate lists of assignments:
   into fields.
 
 .. autosummary::
-    :toctree: generated
     :nosignatures:
-    :template: autosummary/recursive_class.rst
 
+    pystencils.Assignment
     pystencils.AssignmentCollection
 
 
diff --git a/src/pystencils/__init__.py b/src/pystencils/__init__.py
index 8c59f7846..a23ce185d 100644
--- a/src/pystencils/__init__.py
+++ b/src/pystencils/__init__.py
@@ -3,10 +3,6 @@
 from .codegen import (
     Target,
     CreateKernelConfig,
-    CpuOptions,
-    VectorizationOptions,
-    OpenMpOptions,
-    GpuOptions,
     AUTO
 )
 from .defaults import DEFAULTS
@@ -50,10 +46,6 @@ __all__ = [
     "create_numeric_type",
     "make_slice",
     "CreateKernelConfig",
-    "CpuOptions",
-    "VectorizationOptions",
-    "GpuOptions",
-    "OpenMpOptions",
     "AUTO",
     "create_kernel",
     "create_staggered_kernel",
diff --git a/src/pystencils/backend/transformations/add_pragmas.py b/src/pystencils/backend/transformations/add_pragmas.py
index f44b89c72..0e6d314ac 100644
--- a/src/pystencils/backend/transformations/add_pragmas.py
+++ b/src/pystencils/backend/transformations/add_pragmas.py
@@ -98,8 +98,7 @@ class InsertPragmasAtLoops:
 class AddOpenMP:
     """Apply OpenMP directives to loop nests.
 
-    This transformation augments the AST with OpenMP pragmas according to the given
-    `OpenMpConfig` configuration.
+    This transformation augments the AST with OpenMP pragmas according to the given configuration.
     """
 
     def __init__(
diff --git a/src/pystencils/codegen/__init__.py b/src/pystencils/codegen/__init__.py
index 3780527c6..e13f911dd 100644
--- a/src/pystencils/codegen/__init__.py
+++ b/src/pystencils/codegen/__init__.py
@@ -1,10 +1,6 @@
 from .target import Target
 from .config import (
     CreateKernelConfig,
-    CpuOptions,
-    VectorizationOptions,
-    OpenMpOptions,
-    GpuOptions,
     AUTO,
 )
 from .parameters import Parameter
@@ -14,10 +10,6 @@ from .driver import create_kernel, get_driver
 __all__ = [
     "Target",
     "CreateKernelConfig",
-    "CpuOptions",
-    "VectorizationOptions",
-    "OpenMpOptions",
-    "GpuOptions",
     "AUTO",
     "Parameter",
     "Kernel",
diff --git a/src/pystencils/codegen/config.py b/src/pystencils/codegen/config.py
index 9abf51222..cb457f673 100644
--- a/src/pystencils/codegen/config.py
+++ b/src/pystencils/codegen/config.py
@@ -25,7 +25,11 @@ if TYPE_CHECKING:
 
 
 Option_T = TypeVar("Option_T")
+"""Type variable for option values"""
+
+
 Arg_T = TypeVar("Arg_T")
+"""Type variable for option arguments"""
 
 
 class Option(Generic[Option_T, Arg_T]):
@@ -35,19 +39,19 @@ class Option(Generic[Option_T, Arg_T]):
     It maintains a default value for the option that is used when no value
     was specified by the user.
 
-    In configuration options, the value `None` stands for `unset`.
+    In configuration options, the value `None` stands for ``unset``.
     It can therefore not be used to set an option to the meaning "not any", or "empty"
     - for these, special values need to be used.
 
     The Option allows a validator function to be specified,
     which will be called to perform sanity checks on user-provided values.
 
-    Through the validator, options may also be set from arguments of a different type (`Arg_T`)
-    than their value type (`Option_T`). If `Arg_T` is different from `Option_T`,
+    Through the validator, options may also be set from arguments of a different type (``Arg_T``)
+    than their value type (``Option_T``). If ``Arg_T`` is different from ``Option_T``,
     the validator must perform the conversion from the former to the latter.
 
     .. note::
-        `Arg_T` must always be a supertype of `Option_T`.
+        ``Arg_T`` must always be a supertype of ``Option_T``.
     """
 
     def __init__(
@@ -99,7 +103,8 @@ class Option(Generic[Option_T, Arg_T]):
         delattr(obj, self._lookup)
 
 
-class BasicOption(Option[Option_T, Option_T]): ...  # noqa: E701
+class BasicOption(Option[Option_T, Option_T]):
+    "Subclass of Option where ``Arg_T == Option_T``."
 
 
 class ConfigBase(ABC):
@@ -171,6 +176,7 @@ class ConfigBase(ABC):
 
 
 Category_T = TypeVar("Category_T", bound=ConfigBase)
+"""Type variable for option categories."""
 
 
 class Category(Generic[Category_T]):
@@ -212,7 +218,7 @@ Currently, these options permit `AUTO`:
 
 @dataclass
 class OpenMpOptions(ConfigBase):
-    """Parameters controlling kernel parallelization using OpenMP."""
+    """Configuration options controlling automatic OpenMP instrumentation."""
 
     enable: BasicOption[bool] = BasicOption(False)
     """Enable OpenMP instrumentation"""
@@ -238,11 +244,7 @@ class OpenMpOptions(ConfigBase):
 
 @dataclass
 class VectorizationOptions(ConfigBase):
-    """Configuration for the auto-vectorizer.
-
-    If any flag in this configuration is set to a value not supported by the CPU specified
-    in `CreateKernelConfig.target`, an error will be raised.
-    """
+    """Configuration for the auto-vectorizer."""
 
     enable: BasicOption[bool] = BasicOption(False)
     """Enable intrinsic vectorization."""
@@ -305,11 +307,7 @@ class VectorizationOptions(ConfigBase):
 
 @dataclass
 class CpuOptions(ConfigBase):
-    """Configuration for the CPU optimizer.
-
-    If any flag in this configuration is set to a value not supported by the CPU specified
-    in `CreateKernelConfig.target`, an error will be raised.
-    """
+    """Configuration options specific to CPU targets."""
 
     openmp: Category[OpenMpOptions] = Category(OpenMpOptions())
     """Options governing OpenMP-instrumentation.
@@ -335,7 +333,7 @@ class CpuOptions(ConfigBase):
 
 @dataclass
 class GpuOptions(ConfigBase):
-    """Configure index translation behaviour for kernels generated for GPU targets."""
+    """Configuration options specific to GPU targets."""
 
     omit_range_check: BasicOption[bool] = BasicOption(False)
     """If set to `True`, omit the iteration counter range check.
@@ -467,13 +465,13 @@ class CreateKernelConfig(ConfigBase):
     """Target-Specific Options"""
 
     cpu: Category[CpuOptions] = Category(CpuOptions())
-    """Options for CPU kernels."""
+    """Options for CPU kernels. See `CpuOptions`."""
 
     gpu: Category[GpuOptions] = Category(GpuOptions())
-    """Options for GPU Kernels."""
+    """Options for GPU Kernels. See `GpuOptions`."""
 
     sycl: Category[SyclOptions] = Category(SyclOptions())
-    """Options for SYCL kernels."""
+    """Options for SYCL kernels. See `SyclOptions`."""
 
     @index_dtype.validate
     def validate_index_type(self, spec: UserTypeSpec):
@@ -503,13 +501,13 @@ class CreateKernelConfig(ConfigBase):
     """Deprecated; use `default_dtype` instead"""
 
     cpu_openmp: InitVar[bool | int | None] = None
-    """Deprecated; use `cpu_optim.openmp <CpuOptimConfig.openmp>` instead."""
+    """Deprecated; use `cpu.openmp <CpuOptions.openmp>` instead."""
 
     cpu_vectorize_info: InitVar[dict | None] = None
-    """Deprecated; use `cpu_optim.vectorize <CpuOptimConfig.vectorize>` instead."""
+    """Deprecated; use `cpu.vectorize <CpuOptions.vectorize>` instead."""
 
     gpu_indexing_params: InitVar[dict | None] = None
-    """Deprecated; use `gpu_indexing` instead."""
+    """Deprecated; set options in the `gpu` category instead."""
 
     #   Getters
 
-- 
GitLab


From 958a499f4a529c4ca1636e306fdecc327da13759 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Mon, 20 Jan 2025 15:27:51 +0100
Subject: [PATCH 5/7] fix testsuite

---
 .../01_tutorial_getting_started.ipynb         |  10 ++++---
 tests/kernelcreation/test_buffer_gpu.py       |   6 ++--
 tests/kernelcreation/test_iteration_slices.py |   5 ++--
 tests/nbackend/kernelcreation/test_openmp.py  |  26 +++++++-----------
 .../test_data/datahandling_save_test.npz      | Bin 428 -> 410 bytes
 tests/test_quicktests.py                      |  11 +++-----
 6 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/docs/source/tutorials/01_tutorial_getting_started.ipynb b/docs/source/tutorials/01_tutorial_getting_started.ipynb
index 04dc50e51..f6c92a6bb 100644
--- a/docs/source/tutorials/01_tutorial_getting_started.ipynb
+++ b/docs/source/tutorials/01_tutorial_getting_started.ipynb
@@ -1138,7 +1138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1262,11 +1262,13 @@
     }
    ],
    "source": [
+    "cfg = ps.CreateKernelConfig()\n",
+    "cfg.cpu.openmp.enable = True\n",
+    "cfg.cpu.openmp.num_threads = 2\n",
     "ast = ps.create_kernel(\n",
     "    update_rule,\n",
-    "    cpu = ps.CpuOptions(\n",
-    "        openmp=ps.OpenMpOptions(enable=True, num_threads=2))\n",
-    "    )\n",
+    "    cfg\n",
+    ")\n",
     "\n",
     "ps.show_code(ast)"
    ]
diff --git a/tests/kernelcreation/test_buffer_gpu.py b/tests/kernelcreation/test_buffer_gpu.py
index db8cc2596..0b5019fba 100644
--- a/tests/kernelcreation/test_buffer_gpu.py
+++ b/tests/kernelcreation/test_buffer_gpu.py
@@ -299,8 +299,7 @@ def test_iteration_slices(gpu_indexing):
         gpu_src_arr.set(src_arr)
         gpu_dst_arr.fill(0)
 
-        config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
-                                    gpu=gpu_indexing)
+        config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice)
 
         pack_code = create_kernel(pack_eqs, config=config)
         pack_kernel = pack_code.compile()
@@ -312,8 +311,7 @@ def test_iteration_slices(gpu_indexing):
             eq = Assignment(dst_field(idx), buffer(idx))
             unpack_eqs.append(eq)
 
-        config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
-                                    gpu=gpu_indexing)
+        config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice)
 
         unpack_code = create_kernel(unpack_eqs, config=config)
         unpack_kernel = unpack_code.compile()
diff --git a/tests/kernelcreation/test_iteration_slices.py b/tests/kernelcreation/test_iteration_slices.py
index 892d2d949..02b6b9922 100644
--- a/tests/kernelcreation/test_iteration_slices.py
+++ b/tests/kernelcreation/test_iteration_slices.py
@@ -13,7 +13,6 @@ from pystencils import (
     make_slice,
     Target,
     CreateKernelConfig,
-    GpuOptions,
     DynamicType,
 )
 from pystencils.sympyextensions.integer_functions import int_rem
@@ -81,7 +80,7 @@ def test_numerical_slices(gen_config: CreateKernelConfig, xp, islice):
     try:
         kernel = create_kernel(update, gen_config).compile()
     except NotImplementedError:
-        if gen_config.target.is_vector_cpu():
+        if gen_config.get_target().is_vector_cpu():
             #   TODO Gather/Scatter not implemented yet
             pytest.xfail("Gather/Scatter not available yet")
 
@@ -179,7 +178,7 @@ def test_red_black_pattern(gen_config: CreateKernelConfig, xp):
     try:
         kernel = create_kernel(update, gen_config).compile()
     except NotImplementedError:
-        if gen_config.target.is_vector_cpu():
+        if gen_config.get_target().is_vector_cpu():
             pytest.xfail("Gather/Scatter not implemented yet")
 
     if isinstance(kernel, CupyKernelWrapper):
diff --git a/tests/nbackend/kernelcreation/test_openmp.py b/tests/nbackend/kernelcreation/test_openmp.py
index 07a2f1026..4e24cd1b2 100644
--- a/tests/nbackend/kernelcreation/test_openmp.py
+++ b/tests/nbackend/kernelcreation/test_openmp.py
@@ -4,8 +4,6 @@ from pystencils import (
     Assignment,
     create_kernel,
     CreateKernelConfig,
-    CpuOptions,
-    OpenMpOptions,
     Target,
 )
 
@@ -21,16 +19,12 @@ def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
     f, g = fields("f, g: [3D]")
     asm = Assignment(f.center(0), g.center(0))
 
-    omp = OpenMpOptions(
-        enable=True,
-        nesting_depth=nesting_depth,
-        schedule=schedule,
-        collapse=collapse,
-        omit_parallel_construct=omit_parallel_construct,
-    )
-    gen_config = CreateKernelConfig(
-        target=Target.CPU, cpu=CpuOptions(openmp=omp)
-    )
+    gen_config = CreateKernelConfig(target=Target.CPU)
+    gen_config.cpu.openmp.enable = True
+    gen_config.cpu.openmp.nesting_depth = nesting_depth
+    gen_config.cpu.openmp.schedule = schedule
+    gen_config.cpu.openmp.collapse = collapse
+    gen_config.cpu.openmp.omit_parallel_construct = omit_parallel_construct
 
     kernel = create_kernel(asm, gen_config)
     ast = kernel.body
@@ -53,10 +47,10 @@ def test_openmp(nesting_depth, schedule, collapse, omit_parallel_construct):
     pragma = find_omp_pragma(ast)
     tokens = set(pragma.text.split())
 
-    expected_tokens = {"omp", "for", f"schedule({omp.schedule})"}
-    if not omp.omit_parallel_construct:
+    expected_tokens = {"omp", "for", f"schedule({schedule})"}
+    if not omit_parallel_construct:
         expected_tokens.add("parallel")
-    if omp.collapse is not None:
-        expected_tokens.add(f"collapse({omp.collapse})")
+    if collapse is not None:
+        expected_tokens.add(f"collapse({collapse})")
 
     assert tokens == expected_tokens
diff --git a/tests/runtime/test_data/datahandling_save_test.npz b/tests/runtime/test_data/datahandling_save_test.npz
index 22202358a4fa1d1cea4db89c0889f5bca636598b..d363a8a0aba1bb78a06314a19b887eb4c4975334 100644
GIT binary patch
literal 410
zcmWIWW@Zs#U|`??Vnv4TVm_%5Ad7*Ofq|VtgrT@7Sud}kl953GECiAPO9ScIZ^U0o
z3!FR=a4cZ$yh%}WVwU7BU6409ZQ;7b3+7FW4+)wwLwtVxlu2Ad{F++6tX$&hDq>5R
zc1o#PaXF-{T)8-4wS(G&B!*`GZ;QWZ*n0I}`m&5M0Iy?Gic9G07)B-$W?W$d3JM5l
iU<A?7kP7f7R#Puf6Vyim-mGjOGnjxd3rI_WO#}cXWnnh}

literal 428
zcmWIWW@Zs#U|`??Vnv4TVm_%@Ad7*Ofq|VtgrT@7Sud}kl953GECiAPO9ScIZ^U0o
z3!FR=a4cZ$yh%}WVwU7BU6409ZQ;7b3+7FW4+)wwLwtVxlu2Ad{F++6R|5m|&w1#0
zgl-SIU~n-Ih&Eig61XB%LAb!M;UN3(z$z0Nx621J>=G6f+c7W%;B{3>amfI=ijhf#
q8CPh50tW&b7(p~N>;k-r)ie#L3F@@~Z&o&t8B9Q!1*CPrCISFllx#cz

diff --git a/tests/test_quicktests.py b/tests/test_quicktests.py
index d27a5e61b..9cefc84c5 100644
--- a/tests/test_quicktests.py
+++ b/tests/test_quicktests.py
@@ -71,13 +71,10 @@ def test_basic_vectorization():
     update_rule = [
         ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)
     ]
-    ast = ps.create_kernel(
-        update_rule,
-        target=target,
-        cpu=ps.CpuOptions(
-            vectorize=ps.VectorizationOptions(enable=True, assume_inner_stride_one=True)
-        ),
-    )
+    cfg = ps.CreateKernelConfig(target=target)
+    cfg.cpu.vectorize.enable = True
+    cfg.cpu.vectorize.assume_inner_stride_one = True
+    ast = ps.create_kernel(update_rule, cfg)
 
     func = ast.compile()
 
-- 
GitLab


From 3e1cf7b09fd5b409027b01cb08d19072d329fd5c Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Mon, 20 Jan 2025 15:36:32 +0100
Subject: [PATCH 6/7] add test_override

---
 tests/codegen/test_config.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/codegen/test_config.py b/tests/codegen/test_config.py
index 0f7591b3e..f7f29b760 100644
--- a/tests/codegen/test_config.py
+++ b/tests/codegen/test_config.py
@@ -136,3 +136,26 @@ def test_config_validation():
         cfg.index_field = Field.create_generic(
             "idx", spatial_dimensions=1, field_type=FieldType.GENERIC
         )
+
+
+def test_override():
+    cfg1 = CreateKernelConfig()
+    cfg1.function_name = "test"
+    cfg1.cpu.openmp.schedule = "dynamic"
+    cfg1.gpu.manual_launch_grid = False
+    cfg1.allow_double_writes = True
+
+    cfg2 = CreateKernelConfig()
+    cfg2.function_name = "func"
+    cfg2.cpu.openmp.schedule = "static(5)"
+    cfg2.cpu.vectorize.lanes = 12
+    cfg2.allow_double_writes = False
+
+    cfg1.override(cfg2)
+
+    assert cfg1.function_name == "func"
+    assert cfg1.cpu.openmp.schedule == "static(5)"
+    assert cfg1.cpu.openmp.enable is None
+    assert cfg1.cpu.vectorize.lanes == 12
+    assert cfg1.cpu.vectorize.assume_aligned is None
+    assert cfg1.allow_double_writes is False
-- 
GitLab


From 9c97cccb51c5596922cd027f0c9fc95bbf65e3a7 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Wed, 22 Jan 2025 13:47:09 +0100
Subject: [PATCH 7/7] some minor code + docs cleanup

---
 docs/source/api/codegen.md                    | 180 ++++++++++++++++++
 docs/source/api/codegen.rst                   |  94 ---------
 docs/source/migration.md                      |  12 +-
 .../backend/kernelcreation/iteration_space.py |   6 +-
 src/pystencils/codegen/config.py              |  19 +-
 .../test_data/datahandling_save_test.npz      | Bin 410 -> 420 bytes
 6 files changed, 195 insertions(+), 116 deletions(-)
 create mode 100644 docs/source/api/codegen.md
 delete mode 100644 docs/source/api/codegen.rst

diff --git a/docs/source/api/codegen.md b/docs/source/api/codegen.md
new file mode 100644
index 000000000..b739a4f33
--- /dev/null
+++ b/docs/source/api/codegen.md
@@ -0,0 +1,180 @@
+# Code Generation
+
+## Invocation
+
+```{eval-rst}
+.. module:: pystencils.codegen
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+
+  create_kernel
+```
+  
+## Configuration
+
+```{eval-rst}
+.. module:: pystencils.codegen.config
+```
+
+The code generation driver (`create_kernel`, but also `DefaultKernelCreationDriver`) can be configured by
+passing it a `CreateKernelConfig` object.
+This object can be constructed incrementally:
+
+```Python
+cfg = ps.CreateKernelConfig()
+cfg.default_dtype = "float32"
+cfg.target = ps.Target.X86_AVX
+cfg.cpu.openmp.enable = True
+cfg.cpu.vectorize.enable = True
+cfg.cpu.vectorize.assume_inner_stride_one = True
+```
+
+### Options and Option Categories
+
+The following options and option categories are exposed by the configuration object:
+
+#### Target Specification
+
+```{eval-rst}
+.. current
+
+.. autosummary::
+
+  ~CreateKernelConfig.target
+```
+
+#### Data Types
+
+```{eval-rst}
+.. autosummary::
+
+  ~CreateKernelConfig.default_dtype
+  ~CreateKernelConfig.index_dtype
+```
+
+#### Iteration Space
+
+```{eval-rst}
+.. autosummary::
+
+  ~CreateKernelConfig.ghost_layers
+  ~CreateKernelConfig.iteration_slice
+  ~CreateKernelConfig.index_field
+```
+
+#### Kernel Constraint Checks
+
+```{eval-rst}
+.. autosummary::
+
+  ~CreateKernelConfig.allow_double_writes
+  ~CreateKernelConfig.skip_independence_check
+```
+
+#### Target-Specific Options
+
+The following categories with target-specific options are exposed:
+
+| | |
+|---------------------------|--------------------------|
+| {any}`cpu <CpuOptions>`   | Options for CPU kernels  |
+| {any}`gpu <GpuOptions>`   | Options for GPU kernels  |
+| {any}`sycl <SyclOptions>` | Options for SYCL kernels |
+
+
+#### Kernel Object and Just-In-Time Compilation
+
+```{eval-rst}
+.. autosummary::
+
+  ~CreateKernelConfig.function_name
+  ~CreateKernelConfig.jit
+```
+
+### Configuration System Classes
+
+```{eval-rst}
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/recursive_class.rst
+
+  CreateKernelConfig
+  CpuOptions
+  OpenMpOptions
+  VectorizationOptions
+  GpuOptions
+  SyclOptions
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+
+  AUTO
+
+.. dropdown:: Implementation Details
+
+  .. autosummary::
+    :toctree: generated
+    :nosignatures:
+    :template: autosummary/entire_class.rst
+
+    Option
+    BasicOption
+    Category
+    ConfigBase
+
+```
+
+## Target Specification
+
+```{eval-rst}
+
+.. module:: pystencils.codegen.target
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/recursive_class.rst
+
+  Target
+
+```
+
+## Code Generation Drivers
+
+```{eval-rst}
+.. module:: pystencils.codegen.driver
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/entire_class.rst
+
+  DefaultKernelCreationDriver
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+
+  get_driver
+```
+
+## Output Code Objects
+
+```{eval-rst}
+.. currentmodule:: pystencils.codegen
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/entire_class.rst
+
+  Kernel
+  GpuKernel
+  Parameter
+  GpuThreadsRange
+```
diff --git a/docs/source/api/codegen.rst b/docs/source/api/codegen.rst
deleted file mode 100644
index 1fb83fe5f..000000000
--- a/docs/source/api/codegen.rst
+++ /dev/null
@@ -1,94 +0,0 @@
-Code Generation
-===============
-
-.. module:: pystencils.codegen
-
-Invocation
-----------
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-
-  create_kernel
-  
-Configuration
--------------
-
-.. module:: pystencils.codegen.config
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-  :template: autosummary/recursive_class.rst
-
-  CreateKernelConfig
-  CpuOptions
-  OpenMpOptions
-  VectorizationOptions
-  GpuOptions
-  SyclOptions
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-
-  AUTO
-
-.. dropdown:: Configuration System Implementation Details
-
-  .. autosummary::
-    :toctree: generated
-    :nosignatures:
-    :template: autosummary/entire_class.rst
-
-    Option
-    BasicOption
-    Category
-    ConfigBase
-
-
-Target Specification
---------------------
-
-.. module:: pystencils.codegen.target
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-  :template: autosummary/recursive_class.rst
-
-  Target
-
-Code Generation Drivers
------------------------
-
-.. module:: pystencils.codegen.driver
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-  :template: autosummary/entire_class.rst
-
-  DefaultKernelCreationDriver
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-
-  get_driver
-
-Output Code Objects
--------------------
-
-.. currentmodule:: pystencils.codegen
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-  :template: autosummary/entire_class.rst
-
-  Kernel
-  GpuKernel
-  Parameter
-  GpuThreadsRange
diff --git a/docs/source/migration.md b/docs/source/migration.md
index c3cb17d0f..bb4a2cffb 100644
--- a/docs/source/migration.md
+++ b/docs/source/migration.md
@@ -48,12 +48,14 @@ cfg.ghost_layers = 2
   for all index calculations and loop counters.
 - *CPU Optimization Options:* Should now be set via the {any}`cpu <CpuOptions>` option category and its subcategories.
    
-.. dropdown:: Deprecated options of `CreateKernelConfig`
+:::{dropdown} Deprecated options of `CreateKernelConfig`
 
-    - ``data_type``: Use ``default_dtype`` instead
-    - ``cpu_openmp``: Set OpenMP-Options in the `cpu.openmp <OpenMpOptions>` category instead.
-    - ``cpu_vectorize_info``: Set vectorization options in the `cpu.vectorize <VectorizationOptions>` category instead
-    - ``gpu_indexing_params``: Set GPU indexing options in the `gpu <GpuOptions>` category instead
+- ``data_type``: Use ``default_dtype`` instead
+- ``cpu_openmp``: Set OpenMP-Options in the `cpu.openmp <OpenMpOptions>` category instead.
+- ``cpu_vectorize_info``: Set vectorization options in the `cpu.vectorize <VectorizationOptions>` category instead
+- ``gpu_indexing_params``: Set GPU indexing options in the `gpu <GpuOptions>` category instead
+
+:::
 
 
 ### Type Checking
diff --git a/src/pystencils/backend/kernelcreation/iteration_space.py b/src/pystencils/backend/kernelcreation/iteration_space.py
index f7425c06b..313377fd8 100644
--- a/src/pystencils/backend/kernelcreation/iteration_space.py
+++ b/src/pystencils/backend/kernelcreation/iteration_space.py
@@ -453,11 +453,7 @@ def create_full_iteration_space(
 
     assert not ctx.fields.index_fields
 
-    if not (
-        (ghost_layers is not None)
-        or (iteration_slice is not None)
-        or infer_ghost_layers
-    ):
+    if (ghost_layers is None) and (iteration_slice is None) and not infer_ghost_layers:
         raise ValueError(
             "One argument of `ghost_layers`, `iteration_slice`, and `infer_ghost_layers` must be set."
         )
diff --git a/src/pystencils/codegen/config.py b/src/pystencils/codegen/config.py
index cb457f673..cbb3f4f32 100644
--- a/src/pystencils/codegen/config.py
+++ b/src/pystencils/codegen/config.py
@@ -82,17 +82,17 @@ class Option(Generic[Option_T, Arg_T]):
     def is_set(self, obj) -> bool:
         return getattr(obj, self._lookup, None) is not None
 
-    def __set_name__(self, owner, name: str):
+    def __set_name__(self, owner: ConfigBase, name: str):
         self._name = name
         self._lookup = f"_{name}"
 
-    def __get__(self, obj, objtype=None) -> Option_T | None:
+    def __get__(self, obj: ConfigBase, objtype: type[ConfigBase] | None = None) -> Option_T | None:
         if obj is None:
             return None
 
         return getattr(obj, self._lookup, None)
 
-    def __set__(self, obj, arg: Arg_T | None):
+    def __set__(self, obj: ConfigBase, arg: Arg_T | None):
         if arg is not None and self._validator is not None:
             value = self._validator(obj, arg)
         else:
@@ -190,17 +190,17 @@ class Category(Generic[Category_T]):
     def __init__(self, default: Category_T):
         self._default = default
 
-    def __set_name__(self, owner, name: str):
+    def __set_name__(self, owner: ConfigBase, name: str):
         self._name = name
         self._lookup = f"_{name}"
 
-    def __get__(self, obj, objtype=None) -> Category_T:
+    def __get__(self, obj: ConfigBase, objtype: type[ConfigBase] | None = None) -> Category_T:
         if obj is None:
             return self._default
 
         return cast(Category_T, getattr(obj, self._lookup, None))
 
-    def __set__(self, obj, cat: Category_T):
+    def __set__(self, obj: ConfigBase, cat: Category_T):
         setattr(obj, self._lookup, cat.copy())
 
 
@@ -208,12 +208,7 @@ class _AUTO_TYPE: ...  # noqa: E701
 
 
 AUTO = _AUTO_TYPE()
-"""Special value that can be passed to some options for invoking automatic behaviour.
-
-Currently, these options permit `AUTO`:
-
-- `ghost_layers <CreateKernelConfig.ghost_layers>`
-"""
+"""Special value that can be passed to some options for invoking automatic behaviour."""
 
 
 @dataclass
diff --git a/tests/runtime/test_data/datahandling_save_test.npz b/tests/runtime/test_data/datahandling_save_test.npz
index d363a8a0aba1bb78a06314a19b887eb4c4975334..486c7ee74d4421d563c3b1c2e3739d8db6308b07 100644
GIT binary patch
literal 420
zcmWIWW@Zs#U|`??Vnv4TVm_%zAd7*Ofq|VtgrT@7Sud}kl953GECiAPO9ScIZ^U0o
z3!FR=a4cZ$yh%}WVwU7BU6409ZQ;7b3+7FW4+)wwLwtVxlu2Ad{F++6R|5m|&w1#0
zgl-SIU~n-Ih&Eig61XB%LAb!M;UN3(z^W;7OWiZstS&PI;B`?-amfI<h>=Nz8CNKQ
m0tEsZ7(p~N%mTcL)iepH3F@r?Z&o&t8B9Q!1*BELCISH25ot;Q

literal 410
zcmWIWW@Zs#U|`??Vnv4TVm_%5Ad7*Ofq|VtgrT@7Sud}kl953GECiAPO9ScIZ^U0o
z3!FR=a4cZ$yh%}WVwU7BU6409ZQ;7b3+7FW4+)wwLwtVxlu2Ad{F++6tX$&hDq>5R
zc1o#PaXF-{T)8-4wS(G&B!*`GZ;QWZ*n0I}`m&5M0Iy?Gic9G07)B-$W?W$d3JM5l
iU<A?7kP7f7R#Puf6Vyim-mGjOGnjxd3rI_WO#}cXWnnh}

-- 
GitLab