Daniel Bauer · Frederik Hennig · Frederik Hennig · Frederik Hennig · Christoph Alt · Christoph Alt
--- a/.gitignore
+++ b/.gitignore
 __pycache__
 .ipynb_checkpoints
+
 .coverage*
+coverage.xml
+
 *.pyc
 *.vti
 /build

--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -203,7 +203,6 @@ minimal-sympy-master:
  script:
    - python -m pip install --upgrade git+https://github.com/sympy/sympy.git
    - python quicktest.py
-  allow_failure: true
  tags:
    - docker
    - cuda
@@ -255,30 +254,25 @@ pycodegen-integration:

 # -------------------- Code Quality ---------------------------------------------------------------------

-
-flake8-lint:
+.qa-base:
  stage: "Code Quality"
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/nox:alpine
+  needs: []
  except:
    variables:
      - $ENABLE_NIGHTLY_BUILDS
-  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
-  script:
-    - flake8 src/pystencils
  tags:
    - docker

-mypy-typecheck:
-  stage: "Code Quality"
-  except:
-    variables:
-      - $ENABLE_NIGHTLY_BUILDS
-  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
-  before_script:
-    - pip install -e .[tests]
+lint:
+  extends: .qa-base
  script:
-    - mypy src/pystencils
-  tags:
-    - docker
+    - nox --session lint
+
+typecheck:
+  extends: .qa-base
+  script:
+    - nox --session typecheck

 # -------------------- Unit Tests ---------------------------------------------------------------------

@@ -286,18 +280,12 @@ mypy-typecheck:
 tests-and-coverage:
  stage: "Unit Tests"
  needs: []
-  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
-  before_script:
-    - pip install -e .[tests]
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/nox:ubuntu24.04-cuda12.6
  script:
-    - env
-    - pip list
-    - export NUM_CORES=$(nproc --all)
    - mkdir -p ~/.config/matplotlib
    - echo "backend:template" > ~/.config/matplotlib/matplotlibrc
    - mkdir public
-    - pytest -v -n $NUM_CORES --cov-report html --cov-report xml --cov-report term --cov=. -m "not longrun" --html test-report/index.html --junitxml=report.xml
-    - python -m coverage xml
+    - nox --session "testsuite(cupy12)"
  tags:
    - docker
    - cuda11
@@ -318,14 +306,11 @@ tests-and-coverage:


 build-documentation:
-  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/nox:ubuntu24.04-cuda12.6
  stage: docs
  needs: []
-  before_script:
-    - pip install -e .[doc]
  script:
-    - cd docs
-    - make html SPHINXOPTS="-W --keep-going"
+    - nox --session docs -- --fail-on-warnings
  tags:
    - docker
    - cuda11
@@ -335,7 +320,7 @@ build-documentation:


 pages:
-  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
+  image: alpine:latest
  stage: deploy
  needs: ["tests-and-coverage", "build-documentation"]
  script:

--- a/docs/source/contributing/dev-workflow.md
+++ b/docs/source/contributing/dev-workflow.md
+# Development Workflow
+
+This page contains instructions on how to get started with developing pystencils.
+
+## Prepare the Git Repository
+
+The pystencils Git repository is hosted at [i10git.cs.fau.de](https://i10git.cs.fau.de), the GitLab instance of the
+[Chair for Systems Simulation](https://www.cs10.tf.fau.de/) at [FAU Erlangen-Nürnberg](https://fau.de).
+In order to contribute code to pystencils, you will need to acquire an account there; to do so,
+please follow the instructions on the GitLab landing page.
+
+### Create a Fork
+
+Only the core developers of pystencils have write-access to the primary repository.
+To contribute, you will therefore have to create a fork of that repository
+by navigating to the [repository page](https://i10git.cs.fau.de/pycodegen/pystencils)
+and selecting *Fork* there.
+In this fork, you may freely create branches and develop code, which may later be merged to a primary branch
+via merge requests.
+
+### Create a Local Clone
+
+Once you have a fork of the repository, you can clone it to your local machine using the git command-line.
+
+:::{note}
+To clone via SSH, which is recommended, you will first have to [register an SSH key](https://docs.gitlab.com/ee/user/ssh.html).
+:::
+
+Open up a shell and navigate to a directory you want to work in.
+Then, enter
+
+```bash
+git clone git@i10git.cs.fau.de:<your-username>/pystencils.git
+```
+
+to clone your fork of pystencils.
+
+:::{note}
+To keep up to date with the upstream repository, you can add it as a secondary remote to your clone:
+```bash
+git remote add upstream git@i10git.cs.fau.de:pycodegen/pystencils.git
+```
+You can point your clone's `master` branch at the upstream master like this:
+```bash
+git pull --set-upstream upstream master
+```
+:::
+
+## Set Up the Python Environment
+
+To develop pystencils, you will need at least the following software installed on your machine:
+
+- Python 3.10 or later: Since pystencils minimal supported version is Python 3.10, we recommend that you work with Python 3.10 directly.
+- An up-to-date C++ compiler, used by pystencils to JIT-compile generated code
+- [Nox](https://nox.thea.codes/en/stable/), which we use for test automation.
+  Nox will be used extensively in the instructions on testing below.
+- Optionally [CUDA](https://developer.nvidia.com/cuda-toolkit),
+  if you have an Nvidia or AMD GPU and plan to develop on pystencils' GPU capabilities
+
+Once you have these, set up a [virtual environment](https://docs.python.org/3/library/venv.html) for development.
+This ensures that your system's installation of Python is kept clean, and isolates your development environment
+from outside influence.
+Use the following commands to create a virtual environment at `.venv` and perform an editable install of pystencils into it:
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+export PIP_REQUIRE_VIRTUALENV=true
+pip install -e .[dev]
+```
+
+:::{note}
+Setting `PIP_REQUIRE_VIRTUALENV` ensures that pip refuses to install packages globally --
+Consider setting this variable globally in your shell's configuration file.
+:::
+
+You are now ready to go! Create a new git branch to work on, open up an IDE, and start coding.
+Make sure your IDE recognizes the virtual environment you created, though.
+
+## Static Code Analysis
+
+### PEP8 Code Style
+
+We use [flake8](https://github.com/PyCQA/flake8/tree/main) to check our code for compliance with the
+[PEP8](https://peps.python.org/pep-0008/) code style.
+You can either run `flake8` directly, or through Nox, to analyze your code with respect to style errors:
+
+::::{grid}
+:::{grid-item}
+```bash
+nox -s lint
+```
+:::
+:::{grid-item}
+```bash
+flake8 src/pystencils
+```
+:::
+::::
+
+### Static Type Checking
+
+New code added to pystencils is required to carry type annotations,
+and its types are checked using [mypy](https://mypy.readthedocs.io/en/stable/index.html#).
+To discover type errors, run *mypy* either directly or via Nox:
+
+::::{grid}
+:::{grid-item}
+```bash
+nox -s typecheck
+```
+:::
+:::{grid-item}
+```bash
+mypy src/pystencils
+```
+:::
+::::
+
+:::{note}
+Type checking is currently restricted to the `codegen`, `jit`, `backend`, and `types` modules,
+since most code in the remaining modules is significantly older and is not comprehensively
+type-annotated. As more modules are updated with type annotations, this list will expand in the future.
+If you think a new module is ready to be type-checked, add an exception clause for it in the `mypy.ini` file.
+:::
+
+## Running the Test Suite
+
+Pystencils comes with an extensive and steadily growing suite of unit tests.
+To run the testsuite, you may invoke a variant of the Nox `testsuite` session.
+There are multiple different versions of the `testsuite` session, depending on whether you are testing with our
+without CUDA, or which version of Python you wish to test with.
+You can list the available sessions using `nox -l`.
+Select one of the `testsuite` variants and run it via `nox -s "testsuite(<variant>)"`, e.g.
+```
+nox -s "testsuite(cpu)"
+```
+for the CPU-only suite.
+
+During the testsuite run, coverage information is collected and displayed using [coverage.py](https://coverage.readthedocs.io/en/7.6.10/).
+You can display a detailed overview of code coverage by opening the generated `htmlcov/index.html` page.
+
+## Building the Documentation
+
+The pystencils documentation pages are written in MyST Markdown and ReStructuredText,
+located at the `docs` folder, and built using Sphinx.
+To build the documentation pages of pystencils, simply run the `docs` Nox session:
+```bash
+nox -s docs
+```
+
+This will emit the generated HTML pages to `docs/build/html`.
+The `docs` session permits two parameters to customize its execution:
+ - `--clean`: Clean the page generator's output before building
+ - `--fail-on-warnings`: Have the build fail (finish with a nonzero exit code) if Sphinx emits any warnings.
+
+You must pass any of these to the session command *after a pair of dashes* (`--`); e.g.:
+```bash
+nox -s docs -- --clean
+```
--- a/docs/source/contributing/index.md
+++ b/docs/source/contributing/index.md
+# Contributor Guide
+
+Welcome to the Contributor's Guide to pystencils!
+If you are interested in contributing to the development of pystencils, this is the place to start.
+
+Pystencils is an open-source package licensed under the [AGPL v3](https://www.gnu.org/licenses/agpl-3.0.en.html).
+As such, the act of contributing to pystencils by submitting a merge request is taken as agreement to the terms of the licence.
+
+:::{toctree}
+:maxdepth: 2    
+dev-workflow
+:::
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -95,8 +95,9 @@ Topics

 .. toctree::
  :maxdepth: 1
-  :caption: Advanced
+  :caption: Topics

+  contributing/index
  migration
  backend/index


--- a/mypy.ini
+++ b/mypy.ini
@@ -11,6 +11,12 @@ ignore_errors = False
 [mypy-pystencils.types.*]
 ignore_errors = False

+[mypy-pystencils.codegen.*]
+ignore_errors = False
+
+[mypy-pystencils.jit.*]
+ignore_errors = False
+
 [mypy-setuptools.*]
 ignore_missing_imports=true

@@ -22,3 +28,6 @@ ignore_missing_imports=true

 [mypy-cupy.*]
 ignore_missing_imports=true
+
+[mypy-cpuinfo.*]
+ignore_missing_imports=true
--- a/noxfile.py
+++ b/noxfile.py
+from __future__ import annotations
+from typing import Sequence
+
+import os
+import nox
+import subprocess
+import re
+
+nox.options.sessions = ["lint", "typecheck", "testsuite"]
+
+
+def get_cuda_version(session: nox.Session) -> None | tuple[int, ...]:
+    query_args = ["nvcc", "--version"]
+
+    try:
+        query_result = subprocess.run(query_args, capture_output=True)
+    except FileNotFoundError:
+        return None
+
+    matches = re.findall(r"release \d+\.\d+", str(query_result.stdout))
+    if matches:
+        match = matches[0]
+        version_string = match.split()[-1]
+        try:
+            return tuple(int(v) for v in version_string.split("."))
+        except ValueError:
+            pass
+
+    session.warn("nvcc was found, but I am unable to determine the CUDA version.")
+    return None
+
+
+def install_cupy(
+    session: nox.Session, cupy_version: str, skip_if_no_cuda: bool = False
+):
+    if cupy_version is not None:
+        cuda_version = get_cuda_version(session)
+        if cuda_version is None or cuda_version[0] not in (11, 12):
+            if skip_if_no_cuda:
+                session.skip(
+                    "No compatible installation of CUDA found - Need either CUDA 11 or 12"
+                )
+            else:
+                session.warn(
+                    "Running without cupy: no compatbile installation of CUDA found. Need either CUDA 11 or 12."
+                )
+                return
+
+        cuda_major = cuda_version[0]
+        cupy_package = f"cupy-cuda{cuda_major}x=={cupy_version}"
+        session.install(cupy_package)
+
+
+def check_external_doc_dependencies(session: nox.Session):
+    dot_args = ["dot", "--version"]
+    try:
+        _ = subprocess.run(dot_args, capture_output=True)
+    except FileNotFoundError:
+        session.error(
+            "Unable to build documentation: "
+            "Command `dot` from the `graphviz` package (https://www.graphviz.org/) is not available"
+        )
+
+
+def editable_install(session: nox.Session, opts: Sequence[str] = ()):
+    if opts:
+        opts_str = "[" + ",".join(opts) + "]"
+    else:
+        opts_str = ""
+    session.install("-e", f".{opts_str}")
+
+
+@nox.session(python="3.10", tags=["qa", "code-quality"])
+def lint(session: nox.Session):
+    """Lint code using flake8"""
+
+    session.install("flake8")
+    session.run("flake8", "src/pystencils")
+
+
+@nox.session(python="3.10", tags=["qa", "code-quality"])
+def typecheck(session: nox.Session):
+    """Run MyPy for static type checking"""
+    editable_install(session)
+    session.install("mypy")
+    session.run("mypy", "src/pystencils")
+
+
+@nox.parametrize("cupy_version", [None, "12", "13"], ids=["cpu", "cupy12", "cupy13"])
+@nox.session(python="3.10", tags=["test"])
+def testsuite(session: nox.Session, cupy_version: str | None):
+    if cupy_version is not None:
+        install_cupy(session, cupy_version, skip_if_no_cuda=True)
+
+    editable_install(session, ["alltrafos", "use_cython", "interactive", "testsuite"])
+
+    num_cores = os.cpu_count()
+
+    session.run(
+        "pytest",
+        "-v",
+        "-n",
+        str(num_cores),
+        "--cov-report=term",
+        "--cov=.",
+        "-m",
+        "not longrun",
+        "--html",
+        "test-report/index.html",
+        "--junitxml=report.xml",
+    )
+    session.run("coverage", "html")
+    session.run("coverage", "xml")
+
+
+@nox.session(python=["3.10"], tags=["docs"])
+def docs(session: nox.Session):
+    """Build the documentation pages"""
+    check_external_doc_dependencies(session)
+    install_cupy(session, "12.3")
+    editable_install(session, ["doc"])
+
+    env = {}
+
+    session_args = session.posargs
+    if "--fail-on-warnings" in session_args:
+        env["SPHINXOPTS"] = "-W --keep-going"
+
+    session.chdir("docs")
+
+    if "--clean" in session_args:
+        session.run("make", "clean", external=True)
+
+    session.run("make", "html", external=True, env=env)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,11 @@ interactive = [
 use_cython = [
    'Cython'
 ]
+dev = [
+    "flake8",
+    "mypy",
+    "black",
+]
 doc = [
    'sphinx',
    'pydata-sphinx-theme==0.15.4',
@@ -52,9 +57,12 @@ doc = [
    'sphinx_autodoc_typehints',
    'pandoc',
    'sphinx_design',
-    'myst-nb'
+    'myst-nb',
+    'matplotlib',
+    'ipywidgets',
+    'graphviz',
 ]
-tests = [
+testsuite = [
    'pytest',
    'pytest-cov',
    'pytest-html',
@@ -68,6 +76,7 @@ tests = [
    'matplotlib',
    'py-cpuinfo',
    'randomgen>=1.18',
+    'scipy'
 ]

 [build-system]

--- a/pytest.ini
+++ b/pytest.ini
@@ -23,6 +23,7 @@ filterwarnings =
       ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning
       ignore:Animation was deleted without rendering anything:UserWarning

+# Coverage Configuration
 [run]
 branch = True
 source = src/pystencils
@@ -31,6 +32,7 @@ source = src/pystencils
 omit = doc/*
       tests/*
       setup.py
+       noxfile.py
       quicktest.py
       conftest.py
       versioneer.py

--- a/src/pystencils/backend/kernelcreation/context.py
+++ b/src/pystencils/backend/kernelcreation/context.py
@@ -126,7 +126,7 @@ class KernelCreationContext:
            symb.apply_dtype(dtype)

        return symb
-    
+
    def get_new_symbol(self, name: str, dtype: PsType | None = None) -> PsSymbol:
        """Always create a new symbol, deduplicating its name if another symbol with the same name already exists."""

@@ -173,15 +173,11 @@ class KernelCreationContext:
    ) -> PsSymbol:
        """Canonically duplicates the given symbol.

-        A new symbol with the new name ``symb.name + "__<counter>"`` and optionally a different data type 
+        A new symbol with the new name ``symb.name + "__<counter>"`` and optionally a different data type
        is created, added to the symbol table, and returned.
        The ``counter`` reflects the number of previously created duplicates of this symbol.
        """
-        if (result := self._symbol_ctr_pattern.search(symb.name)) is not None:
-            span = result.span()
-            basename = symb.name[: span[0]]
-        else:
-            basename = symb.name
+        basename = self.basename(symb)

        if new_dtype is None:
            new_dtype = symb.dtype
@@ -194,6 +190,14 @@ class KernelCreationContext:
                return self.get_symbol(dup_name, new_dtype)
        assert False, "unreachable code"

+    def basename(self, symb: PsSymbol) -> str:
+        """Returns the original name a symbol had before duplication."""
+        if (result := self._symbol_ctr_pattern.search(symb.name)) is not None:
+            span = result.span()
+            return symb.name[: span[0]]
+        else:
+            return symb.name
+
    @property
    def symbols(self) -> Iterable[PsSymbol]:
        """Return an iterable of all symbols listed in the symbol table."""

--- a/src/pystencils/backend/platforms/generic_cpu.py
+++ b/src/pystencils/backend/platforms/generic_cpu.py
@@ -57,7 +57,7 @@ class GenericCpu(Platform):

    def select_function(self, call: PsCall) -> PsExpression:
        assert isinstance(call.function, PsMathFunction)
-        
+
        func = call.function.func
        dtype = call.get_dtype()
        arg_types = (dtype,) * func.num_args
@@ -87,13 +87,13 @@ class GenericCpu(Platform):

            call.function = cfunc
            return call
-                
+
        if isinstance(dtype, PsIntegerType):
            match func:
                case MathFunctions.Abs:
                    zero = PsExpression.make(PsConstant(0, dtype))
                    arg = call.args[0]
-                    return PsTernary(PsGe(arg, zero), arg, - arg)
+                    return PsTernary(PsGe(arg, zero), arg, -arg)
                case MathFunctions.Min:
                    arg1, arg2 = call.args
                    return PsTernary(PsLe(arg1, arg2), arg1, arg2)
@@ -131,7 +131,10 @@ class GenericCpu(Platform):
                PsLookup(
                    PsBufferAcc(
                        ispace.index_list.base_pointer,
-                        (PsExpression.make(ispace.sparse_counter), factory.parse_index(0)),
+                        (
+                            PsExpression.make(ispace.sparse_counter),
+                            factory.parse_index(0),
+                        ),
                    ),
                    coord.name,
                ),
@@ -158,26 +161,33 @@ class GenericVectorCpu(GenericCpu, ABC):
    @abstractmethod
    def type_intrinsic(self, vector_type: PsVectorType) -> PsCustomType:
        """Return the intrinsic vector type for the given generic vector type,
-        or raise an `MaterializationError` if type is not supported."""
+        or raise a `MaterializationError` if type is not supported."""

    @abstractmethod
    def constant_intrinsic(self, c: PsConstant) -> PsExpression:
        """Return an expression that initializes a constant vector,
-        or raise an `MaterializationError` if not supported."""
+        or raise a `MaterializationError` if not supported."""

    @abstractmethod
    def op_intrinsic(
        self, expr: PsExpression, operands: Sequence[PsExpression]
    ) -> PsExpression:
        """Return an expression intrinsically invoking the given operation
-        or raise an `MaterializationError` if not supported."""
+        or raise a `MaterializationError` if not supported."""
+
+    @abstractmethod
+    def math_func_intrinsic(
+        self, expr: PsCall, operands: Sequence[PsExpression]
+    ) -> PsExpression:
+        """Return an expression intrinsically invoking the given mathematical
+        function or raise a `MaterializationError` if not supported."""

    @abstractmethod
    def vector_load(self, acc: PsVecMemAcc) -> PsExpression:
        """Return an expression intrinsically performing a vector load,
-        or raise an `MaterializationError` if not supported."""
+        or raise a `MaterializationError` if not supported."""

    @abstractmethod
    def vector_store(self, acc: PsVecMemAcc, arg: PsExpression) -> PsExpression:
        """Return an expression intrinsically performing a vector store,
-        or raise an `MaterializationError` if not supported."""
+        or raise a `MaterializationError` if not supported."""
--- a/src/pystencils/backend/platforms/x86.py
+++ b/src/pystencils/backend/platforms/x86.py
@@ -13,7 +13,9 @@ from ..ast.expressions import (
    PsSub,
    PsMul,
    PsDiv,
-    PsConstantExpr
+    PsConstantExpr,
+    PsCast,
+    PsCall,
 )
 from ..ast.vector import PsVecMemAcc, PsVecBroadcast
 from ...types import PsCustomType, PsVectorType, PsPointerType
@@ -23,15 +25,15 @@ from ..exceptions import MaterializationError
 from .generic_cpu import GenericVectorCpu
 from ..kernelcreation import KernelCreationContext

-from ...types.quick import Fp, SInt
-from ..functions import CFunction
+from ...types.quick import Fp, UInt, SInt
+from ..functions import CFunction, PsMathFunction, MathFunctions


 class X86VectorArch(Enum):
    SSE = 128
    AVX = 256
    AVX512 = 512
-    AVX512_FP16 = AVX512 + 1    # TODO improve modelling?
+    AVX512_FP16 = AVX512 + 1  # TODO improve modelling?

    def __ge__(self, other: X86VectorArch) -> bool:
        return self.value >= other.value
@@ -78,7 +80,7 @@ class X86VectorArch(Enum):
                )

        return suffix
-    
+
    def intrin_type(self, vtype: PsVectorType):
        scalar_type = vtype.scalar_type
        match scalar_type:
@@ -96,9 +98,7 @@ class X86VectorArch(Enum):
                )

        if vtype.width > self.max_vector_width:
-            raise MaterializationError(
-                f"x86/{self} does not support {vtype}"
-            )
+            raise MaterializationError(f"x86/{self} does not support {vtype}")
        return PsCustomType(f"__m{vtype.width}{suffix}")


@@ -161,26 +161,124 @@ class X86VectorCpu(GenericVectorCpu):
        match expr:
            case PsUnOp() | PsBinOp():
                func = _x86_op_intrin(self._vector_arch, expr, expr.get_dtype())
-                return func(*operands)
+                intrinsic = func(*operands)
+                intrinsic.dtype = func.return_type
+                return intrinsic
            case _:
                raise MaterializationError(f"Cannot map {type(expr)} to x86 intrinsic")

+    def math_func_intrinsic(
+        self, expr: PsCall, operands: Sequence[PsExpression]
+    ) -> PsExpression:
+        assert isinstance(expr.function, PsMathFunction)
+
+        vtype = expr.get_dtype()
+        assert isinstance(vtype, PsVectorType)
+
+        prefix = self._vector_arch.intrin_prefix(vtype)
+        suffix = self._vector_arch.intrin_suffix(vtype)
+        rtype = atype = self._vector_arch.intrin_type(vtype)
+
+        match expr.function.func:
+            case (
+                MathFunctions.Exp
+                | MathFunctions.Log
+                | MathFunctions.Sin
+                | MathFunctions.Cos
+                | MathFunctions.Tan
+                | MathFunctions.Sinh
+                | MathFunctions.Cosh
+                | MathFunctions.ASin
+                | MathFunctions.ACos
+                | MathFunctions.ATan
+                | MathFunctions.ATan2
+                | MathFunctions.Pow
+            ):
+                raise MaterializationError(
+                    "Trigonometry, exp, log, and pow require SVML."
+                )
+
+            case MathFunctions.Floor | MathFunctions.Ceil if vtype.is_float():
+                opstr = expr.function.func.function_name
+                if vtype.width > 256:
+                    raise MaterializationError("512bit ceil/floor require SVML.")
+
+            case MathFunctions.Min | MathFunctions.Max:
+                opstr = expr.function.func.function_name
+                if (
+                    vtype.is_int()
+                    and vtype.scalar_type.width == 64
+                    and self._vector_arch < X86VectorArch.AVX512
+                ):
+                    raise MaterializationError(
+                        "64bit integer (signed and unsigned) min/max intrinsics require AVX512."
+                    )
+
+            case MathFunctions.Abs:
+                assert len(operands) == 1, "abs takes exactly one argument."
+                op = operands[0]
+
+                match vtype.scalar_type:
+                    case UInt():
+                        return op
+
+                    case SInt(width):
+                        opstr = expr.function.func.function_name
+                        if width == 64 and self._vector_arch < X86VectorArch.AVX512:
+                            raise MaterializationError(
+                                "64bit integer abs intrinsic requires AVX512."
+                            )
+
+                    case Fp():
+                        neg_zero = self.constant_intrinsic(PsConstant(-0.0, vtype))
+
+                        opstr = "andnot"
+                        func = CFunction(
+                            f"{prefix}_{opstr}_{suffix}", (atype,) * 2, rtype
+                        )
+
+                        return func(neg_zero, op)
+
+            case _:
+                raise MaterializationError(
+                    f"x86/{self} does not support {expr.function.func.function_name} on type {vtype}."
+                )
+
+        if expr.function.func in [
+            MathFunctions.ATan2,
+            MathFunctions.Min,
+            MathFunctions.Max,
+        ]:
+            num_args = 2
+        else:
+            num_args = 1
+
+        func = CFunction(f"{prefix}_{opstr}_{suffix}", (atype,) * num_args, rtype)
+        return func(*operands)
+
    def vector_load(self, acc: PsVecMemAcc) -> PsExpression:
        if acc.stride is None:
-            load_func = _x86_packed_load(self._vector_arch, acc.dtype, False)
-            return load_func(
-                PsAddressOf(PsMemAcc(acc.pointer, acc.offset))
-            )
+            load_func, addr_type = _x86_packed_load(self._vector_arch, acc.dtype, False)
+            addr: PsExpression = PsAddressOf(PsMemAcc(acc.pointer, acc.offset))
+            if addr_type:
+                addr = PsCast(addr_type, addr)
+            intrinsic = load_func(addr)
+            intrinsic.dtype = load_func.return_type
+            return intrinsic
        else:
            raise NotImplementedError("Gather loads not implemented yet.")

    def vector_store(self, acc: PsVecMemAcc, arg: PsExpression) -> PsExpression:
        if acc.stride is None:
-            store_func = _x86_packed_store(self._vector_arch, acc.dtype, False)
-            return store_func(
-                PsAddressOf(PsMemAcc(acc.pointer, acc.offset)),
-                arg,
+            store_func, addr_type = _x86_packed_store(
+                self._vector_arch, acc.dtype, False
            )
+            addr: PsExpression = PsAddressOf(PsMemAcc(acc.pointer, acc.offset))
+            if addr_type:
+                addr = PsCast(addr_type, addr)
+            intrinsic = store_func(addr, arg)
+            intrinsic.dtype = store_func.return_type
+            return intrinsic
        else:
            raise NotImplementedError("Scatter stores not implemented yet.")

@@ -188,26 +286,46 @@ class X86VectorCpu(GenericVectorCpu):
 @cache
 def _x86_packed_load(
    varch: X86VectorArch, vtype: PsVectorType, aligned: bool
-) -> CFunction:
+) -> tuple[CFunction, PsPointerType | None]:
    prefix = varch.intrin_prefix(vtype)
-    suffix = varch.intrin_suffix(vtype)
    ptr_type = PsPointerType(vtype.scalar_type, const=True)
-    return CFunction(
-        f"{prefix}_load{'' if aligned else 'u'}_{suffix}", (ptr_type,), vtype
+
+    if isinstance(vtype.scalar_type, SInt):
+        suffix = f"si{vtype.width}"
+        addr_type = PsPointerType(varch.intrin_type(vtype))
+    else:
+        suffix = varch.intrin_suffix(vtype)
+        addr_type = None
+
+    return (
+        CFunction(
+            f"{prefix}_load{'' if aligned else 'u'}_{suffix}", (ptr_type,), vtype
+        ),
+        addr_type,
    )


 @cache
 def _x86_packed_store(
    varch: X86VectorArch, vtype: PsVectorType, aligned: bool
-) -> CFunction:
+) -> tuple[CFunction, PsPointerType | None]:
    prefix = varch.intrin_prefix(vtype)
-    suffix = varch.intrin_suffix(vtype)
    ptr_type = PsPointerType(vtype.scalar_type, const=True)
-    return CFunction(
-        f"{prefix}_store{'' if aligned else 'u'}_{suffix}",
-        (ptr_type, vtype),
-        PsCustomType("void"),
+
+    if isinstance(vtype.scalar_type, SInt):
+        suffix = f"si{vtype.width}"
+        addr_type = PsPointerType(varch.intrin_type(vtype))
+    else:
+        suffix = varch.intrin_suffix(vtype)
+        addr_type = None
+
+    return (
+        CFunction(
+            f"{prefix}_store{'' if aligned else 'u'}_{suffix}",
+            (ptr_type, vtype),
+            PsCustomType("void"),
+        ),
+        addr_type,
    )


@@ -223,7 +341,7 @@ def _x86_op_intrin(
        case PsVecBroadcast():
            opstr = "set1"
            if vtype.scalar_type == SInt(64) and vtype.vector_entries <= 4:
-                suffix += "x"   
+                suffix += "x"
            atype = vtype.scalar_type
        case PsAdd():
            opstr = "add"
@@ -239,8 +357,52 @@ def _x86_op_intrin(
            opstr = "mul"
        case PsDiv():
            opstr = "div"
+        case PsCast(target_type, arg):
+            atype = arg.dtype
+            widest_type = max(vtype, atype, key=lambda t: t.width)
+
+            assert target_type == vtype, "type mismatch"
+            assert isinstance(atype, PsVectorType)
+
+            def panic(detail: str = ""):
+                raise MaterializationError(
+                    f"Unable to select intrinsic for type conversion: "
+                    f"{varch.name} does not support packed conversion from {atype} to {target_type}. {detail}\n"
+                    f"    at: {op}"
+                )
+
+            if atype == vtype:
+                panic("Use `EliminateConstants` to eliminate trivial casts.")
+
+            match (atype.scalar_type, vtype.scalar_type):
+                # Not supported: cvtepi8_pX, cvtpX_epi8, cvtepi16_p[sd], cvtp[sd]_epi16
+                case (
+                    (SInt(8), Fp())
+                    | (Fp(), SInt(8))
+                    | (SInt(16), Fp(32))
+                    | (SInt(16), Fp(64))
+                    | (Fp(32), SInt(16))
+                    | (Fp(64), SInt(16))
+                ):
+                    panic()
+                # AVX512 only: cvtepi64_pX, cvtpX_epi64
+                case (SInt(64), Fp()) | (
+                    Fp(),
+                    SInt(64),
+                ) if varch < X86VectorArch.AVX512:
+                    panic()
+                # AVX512 only: cvtepiA_epiT if A > T
+                case (SInt(a), SInt(t)) if a > t and varch < X86VectorArch.AVX512:
+                    panic()
+
+                case _:
+                    prefix = varch.intrin_prefix(widest_type)
+                    opstr = f"cvt{varch.intrin_suffix(atype)}"
+
        case _:
-            raise MaterializationError(f"Unable to select operation intrinsic for {type(op)}")
+            raise MaterializationError(
+                f"Unable to select operation intrinsic for {type(op)}"
+            )

    num_args = 1 if isinstance(op, PsUnOp) else 2
    return CFunction(f"{prefix}_{opstr}_{suffix}", (atype,) * num_args, rtype)
--- a/src/pystencils/backend/transformations/ast_vectorizer.py
+++ b/src/pystencils/backend/transformations/ast_vectorizer.py
 from __future__ import annotations
-from typing import overload
+from textwrap import indent
+from typing import cast, overload

 from dataclasses import dataclass

@@ -11,7 +12,13 @@ from ..constants import PsConstant
 from ..functions import PsMathFunction

 from ..ast import PsAstNode
-from ..ast.structural import PsBlock, PsDeclaration, PsAssignment
+from ..ast.structural import (
+    PsBlock,
+    PsDeclaration,
+    PsAssignment,
+    PsLoop,
+    PsEmptyLeafMixIn,
+)
 from ..ast.expressions import (
    PsExpression,
    PsAddressOf,
@@ -24,6 +31,7 @@ from ..ast.expressions import (
    PsCall,
    PsMemAcc,
    PsBufferAcc,
+    PsSubscript,
    PsAdd,
    PsMul,
    PsSub,
@@ -148,6 +156,10 @@ class VectorizationContext:
            )
        return PsVectorType(scalar_type, self._lanes)

+    def axis_ctr_dependees(self, symbols: set[PsSymbol]) -> set[PsSymbol]:
+        """Returns all symbols in `symbols` that depend on the axis counter."""
+        return symbols & (self.vectorized_symbols.keys() | {self.axis.counter})
+

 @dataclass
 class Affine:
@@ -246,7 +258,7 @@ class AstVectorizer:

    def __call__(self, node: PsAstNode, vc: VectorizationContext) -> PsAstNode:
        """Perform subtree vectorization.
-        
+
        Args:
            node: Root of the subtree that should be vectorized
            vc: Object describing the current vectorization context
@@ -273,6 +285,14 @@ class AstVectorizer:
                return PsDeclaration(vec_lhs, vec_rhs)

            case PsAssignment(lhs, rhs):
+                if (
+                    isinstance(lhs, PsSymbolExpr)
+                    and lhs.symbol in vc.vectorized_symbols
+                ):
+                    return PsAssignment(
+                        self.visit_expr(lhs, vc), self.visit_expr(rhs, vc)
+                    )
+
                if not isinstance(lhs, (PsMemAcc, PsBufferAcc)):
                    raise VectorizationError(f"Unable to vectorize assignment to {lhs}")

@@ -286,6 +306,29 @@ class AstVectorizer:
                rhs_vec = self.visit_expr(rhs, vc)
                return PsAssignment(lhs_vec, rhs_vec)

+            case PsLoop(counter, start, stop, step, body):
+                # Check that loop bounds are lane-invariant
+                free_symbols = (
+                    self._collect_symbols(start)
+                    | self._collect_symbols(stop)
+                    | self._collect_symbols(step)
+                )
+                vec_dependencies = vc.axis_ctr_dependees(free_symbols)
+                if vec_dependencies:
+                    raise VectorizationError(
+                        "Unable to vectorize loop depending on vectorized symbols:\n"
+                        f"  Offending dependencies:\n"
+                        f"    {vec_dependencies}\n"
+                        f"  Found in loop:\n"
+                        f"{indent(str(node), '   ')}"
+                    )
+
+                vectorized_body = cast(PsBlock, self.visit(body, vc))
+                return PsLoop(counter, start, stop, step, vectorized_body)
+
+            case PsEmptyLeafMixIn():
+                return node
+
            case _:
                raise NotImplementedError(f"Vectorization of {node} is not implemented")

@@ -426,6 +469,23 @@ class AstVectorizer:
                    #   Buffer access is lane-invariant
                    vec_expr = PsVecBroadcast(vc.lanes, expr.clone())

+            case PsSubscript(array, index):
+                # Check that array expression and indices are lane-invariant
+                free_symbols = self._collect_symbols(array).union(
+                    *[self._collect_symbols(i) for i in index]
+                )
+                vec_dependencies = vc.axis_ctr_dependees(free_symbols)
+                if vec_dependencies:
+                    raise VectorizationError(
+                        "Unable to vectorize array subscript depending on vectorized symbols:\n"
+                        f"  Offending dependencies:\n"
+                        f"    {vec_dependencies}\n"
+                        f"  Found in expression:\n"
+                        f"{indent(str(expr), '   ')}"
+                    )
+
+                vec_expr = PsVecBroadcast(vc.lanes, expr.clone())
+
            case _:
                raise NotImplementedError(
                    f"Vectorization of {type(expr)} is not implemented"

--- a/src/pystencils/backend/transformations/eliminate_constants.py
+++ b/src/pystencils/backend/transformations/eliminate_constants.py
@@ -40,13 +40,7 @@ from ..ast.util import AstEqWrapper
 from ..constants import PsConstant
 from ..memory import PsSymbol
 from ..functions import PsMathFunction
-from ...types import (
-    PsNumericType,
-    PsBoolType,
-    PsScalarType,
-    PsVectorType,
-    constify
-)
+from ...types import PsNumericType, PsBoolType, PsScalarType, PsVectorType, constify


 __all__ = ["EliminateConstants"]
@@ -261,6 +255,9 @@ class EliminateConstants:
                assert isinstance(target_type, PsNumericType)
                return PsConstantExpr(c.reinterpret_as(target_type)), True

+            case PsCast(target_type, op) if target_type == op.get_dtype():
+                return op, all(subtree_constness)
+
            case PsVecBroadcast(lanes, PsConstantExpr(c)):
                scalar_type = c.get_dtype()
                assert isinstance(scalar_type, PsScalarType)
@@ -358,7 +355,10 @@ class EliminateConstants:
                                    from ...utils import c_intdiv

                                    folded = PsConstant(c_intdiv(v1, v2), dtype)
-                                elif isinstance(dtype, PsNumericType) and dtype.is_float():
+                                elif (
+                                    isinstance(dtype, PsNumericType)
+                                    and dtype.is_float()
+                                ):
                                    folded = PsConstant(v1 / v2, dtype)

                            if folded is not None:

--- a/src/pystencils/backend/transformations/select_intrinsics.py
+++ b/src/pystencils/backend/transformations/select_intrinsics.py
@@ -4,11 +4,12 @@ from typing import cast
 from ..kernelcreation import KernelCreationContext
 from ..memory import PsSymbol
 from ..ast.structural import PsAstNode, PsDeclaration, PsAssignment, PsStatement
-from ..ast.expressions import PsExpression
-from ...types import PsVectorType, constify, deconstify
+from ..ast.expressions import PsExpression, PsCall, PsCast, PsLiteral
+from ...types import PsCustomType, PsVectorType, constify, deconstify
 from ..ast.expressions import PsSymbolExpr, PsConstantExpr, PsUnOp, PsBinOp
 from ..ast.vector import PsVecMemAcc
 from ..exceptions import MaterializationError
+from ..functions import CFunction, PsMathFunction

 from ..platforms import GenericVectorCpu

@@ -39,22 +40,34 @@ class SelectionContext:

 class SelectIntrinsics:
    """Lower IR vector types to intrinsic vector types, and IR vector operations to intrinsic vector operations.
-    
+
    This transformation will replace all vectorial IR elements by conforming implementations using
    compiler intrinsics for the given execution platform.

    Args:
        ctx: The current kernel creation context
        platform: Platform object representing the target hardware, which provides the intrinsics
+        use_builtin_convertvector: If `True`, type conversions between SIMD
+            vectors use the compiler builtin ``__builtin_convertvector``
+            instead of instrinsics. It is supported by Clang >= 3.7, GCC >= 9.1,
+            and ICX. Not supported by ICC or MSVC. Activate if you need type
+            conversions not natively supported by your CPU, e.g. conversion from
+            64bit integer to double on an x86 AVX machine. Defaults to `False`.

    Raises:
        MaterializationError: If a vector type or operation cannot be represented by intrinsics
            on the given platform
    """

-    def __init__(self, ctx: KernelCreationContext, platform: GenericVectorCpu):
+    def __init__(
+        self,
+        ctx: KernelCreationContext,
+        platform: GenericVectorCpu,
+        use_builtin_convertvector: bool = False,
+    ):
        self._ctx = ctx
        self._platform = platform
+        self._use_builtin_convertvector = use_builtin_convertvector

    def __call__(self, node: PsAstNode) -> PsAstNode:
        return self.visit(node, SelectionContext(self._ctx, self._platform))
@@ -68,11 +81,11 @@ class SelectIntrinsics:
                lhs_new = cast(PsSymbolExpr, self.visit_expr(lhs, sc))
                rhs_new = self.visit_expr(rhs, sc)
                return PsDeclaration(lhs_new, rhs_new)
-            
-            case PsAssignment(lhs, rhs) if isinstance(lhs, PsVecMemAcc): 
+
+            case PsAssignment(lhs, rhs) if isinstance(lhs, PsVecMemAcc):
                new_rhs = self.visit_expr(rhs, sc)
                return PsStatement(self._platform.vector_store(lhs, new_rhs))
-            
+
            case _:
                node.children = [self.visit(c, sc) for c in node.children]

@@ -89,6 +102,22 @@ class SelectIntrinsics:
            case PsConstantExpr(c):
                return self._platform.constant_intrinsic(c)

+            case PsCast(target_type, operand) if self._use_builtin_convertvector:
+                assert isinstance(target_type, PsVectorType)
+                op = self.visit_expr(operand, sc)
+
+                rtype = PsCustomType(
+                    f"{target_type.scalar_type.c_string()} __attribute__((__vector_size__({target_type.itemsize})))"
+                )
+                target_type_literal = PsExpression.make(PsLiteral(rtype.name, rtype))
+
+                func = CFunction(
+                    "__builtin_convertvector", (op.get_dtype(), rtype), target_type
+                )
+                intrinsic = func(op, target_type_literal)
+                intrinsic.dtype = func.return_type
+                return intrinsic
+
            case PsUnOp(operand):
                op = self.visit_expr(operand, sc)
                return self._platform.op_intrinsic(expr, [op])
@@ -102,6 +131,10 @@ class SelectIntrinsics:
            case PsVecMemAcc():
                return self._platform.vector_load(expr)

+            case PsCall(function, args) if isinstance(function, PsMathFunction):
+                arguments = [self.visit_expr(a, sc) for a in args]
+                return self._platform.math_func_intrinsic(expr, arguments)
+
            case _:
                raise MaterializationError(
                    f"Unable to select intrinsic implementation for {expr}"

--- a/src/pystencils/codegen/config.py
+++ b/src/pystencils/codegen/config.py
@@ -448,6 +448,7 @@ class CreateKernelConfig:
        if cpu_openmp is not None:
            _deprecated_option("cpu_openmp", "cpu_optim.openmp")

+            deprecated_omp: OpenMpConfig | bool
            match cpu_openmp:
                case True:
                    deprecated_omp = OpenMpConfig()

--- a/src/pystencils/codegen/driver.py
+++ b/src/pystencils/codegen/driver.py
@@ -116,6 +116,7 @@ class DefaultKernelCreationDriver:
        self._target = self._cfg.get_target()
        self._platform = self._get_platform()

+        self._intermediates: CodegenIntermediates | None
        if retain_intermediates:
            self._intermediates = CodegenIntermediates()
        else:

--- a/src/pystencils/codegen/target.py
+++ b/src/pystencils/codegen/target.py
@@ -115,6 +115,18 @@ class Target(Flag):
            return avail_targets.pop()
        else:
            return Target.GenericCPU
+        
+    @staticmethod
+    def available_targets() -> list[Target]:
+        targets = [Target.GenericCPU]
+        try:
+            import cupy  # noqa: F401
+            targets.append(Target.CUDA)
+        except ImportError:
+            pass
+
+        targets += Target.available_vector_cpu_targets()
+        return targets

    @staticmethod
    def available_vector_cpu_targets() -> list[Target]:

--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -31,11 +31,13 @@ except ImportError:
 AVAILABLE_TARGETS += ps.Target.available_vector_cpu_targets()
 TARGET_IDS = [t.name for t in AVAILABLE_TARGETS]

+
 @pytest.fixture(params=AVAILABLE_TARGETS, ids=TARGET_IDS)
 def target(request) -> ps.Target:
    """Provides all code generation targets available on the current hardware"""
    return request.param

+
 @pytest.fixture
 def gen_config(target: ps.Target):
    """Default codegen configuration for the current target.
@@ -56,6 +58,7 @@ def gen_config(target: ps.Target):

    return gen_config

+
 @pytest.fixture()
 def xp(target: ps.Target) -> ModuleType:
    """Primary array module for the current target.

--- a/tests/kernelcreation/test_functions.py
+++ b/tests/kernelcreation/test_functions.py
 import sympy as sp
 import numpy as np
 import pytest
-
-from pystencils import fields, create_kernel, CreateKernelConfig, Target, Assignment, Field
+from dataclasses import replace
+from itertools import product
+
+from pystencils import (
+    fields,
+    create_kernel,
+    CreateKernelConfig,
+    Target,
+    Assignment,
+    Field,
+)
 from pystencils.backend.ast import dfs_preorder
 from pystencils.backend.ast.expressions import PsCall

@@ -34,38 +43,42 @@ def binary_function(name, xp):
    }[name]


-@pytest.mark.parametrize("target", (Target.GenericCPU, Target.CUDA))
+AVAIL_TARGETS = Target.available_targets()
+
+
 @pytest.mark.parametrize(
-    "function_name",
-    (
-        "exp",
-        "log",
-        "sin",
-        "cos",
-        "tan",
-        "sinh",
-        "cosh",
-        "asin",
-        "acos",
-        "atan",
-        "abs",
-        "floor",
-        "ceil",
-    ),
+    "function_name, target",
+    list(
+        product(
+            (
+                "exp",
+                "log",
+                "sin",
+                "cos",
+                "tan",
+                "sinh",
+                "cosh",
+                "asin",
+                "acos",
+                "atan",
+            ),
+            [t for t in AVAIL_TARGETS if Target._X86 not in t],
+        )
+    )
+    + list(
+        product(
+            ["floor", "ceil"], [t for t in AVAIL_TARGETS if Target._AVX512 not in t]
+        )
+    )
+    + list(product(["abs"], AVAIL_TARGETS)),
 )
 @pytest.mark.parametrize("dtype", (np.float32, np.float64))
-def test_unary_functions(target, function_name, dtype):
-    if target == Target.CUDA:
-        xp = pytest.importorskip("cupy")
-    else:
-        xp = np
-
+def test_unary_functions(gen_config, xp, function_name, dtype):
    sp_func, xp_func = unary_function(function_name, xp)
    resolution = np.finfo(dtype).resolution

-    inp = xp.array(
-        [[0.1, 0.2, 0.3], [-0.8, -1.6, -12.592], [xp.pi, xp.e, 0.0]], dtype=dtype
-    )
+    #   Array size should be larger than eight, such that vectorized kernels don't just run their remainder loop
+    inp = xp.array([0.1, 0.2, 0.0, -0.8, -1.6, -12.592, xp.pi, xp.e, -0.3], dtype=dtype)
    outp = xp.zeros_like(inp)

    reference = xp_func(inp)
@@ -74,7 +87,7 @@ def test_unary_functions(target, function_name, dtype):
    outp_field = inp_field.new_field_with_different_name("outp")

    asms = [Assignment(outp_field.center(), sp_func(inp_field.center()))]
-    gen_config = CreateKernelConfig(target=target, default_dtype=dtype)
+    gen_config = replace(gen_config, default_dtype=dtype)

    kernel = create_kernel(asms, gen_config)
    kfunc = kernel.compile()
@@ -83,28 +96,26 @@ def test_unary_functions(target, function_name, dtype):
    xp.testing.assert_allclose(outp, reference, rtol=resolution)


-@pytest.mark.parametrize("target", (Target.GenericCPU, Target.CUDA))
-@pytest.mark.parametrize("function_name", ("min", "max", "pow", "atan2"))
+@pytest.mark.parametrize(
+    "function_name,target",
+    list(product(["min", "max"], AVAIL_TARGETS))
+    + list(
+        product(["pow", "atan2"], [t for t in AVAIL_TARGETS if Target._X86 not in t])
+    ),
+)
 @pytest.mark.parametrize("dtype", (np.float32, np.float64))
-def test_binary_functions(target, function_name, dtype):
-    if target == Target.CUDA:
-        xp = pytest.importorskip("cupy")
-    else:
-        xp = np
-
-    sp_func, np_func = binary_function(function_name, xp)
+def test_binary_functions(gen_config, xp, function_name, dtype):
+    sp_func, xp_func = binary_function(function_name, xp)
    resolution: dtype = np.finfo(dtype).resolution

-    inp = xp.array(
-        [[0.1, 0.2, 0.3], [-0.8, -1.6, -12.592], [xp.pi, xp.e, 0.0]], dtype=dtype
-    )
+    inp = xp.array([0.1, 0.2, 0.3, -0.8, -1.6, -12.592, xp.pi, xp.e, 0.0], dtype=dtype)
    inp2 = xp.array(
-        [[3.1, -0.5, 21.409], [11.0, 1.0, -14e3], [2.0 * xp.pi, -xp.e, 0.0]],
+        [3.1, -0.5, 21.409, 11.0, 1.0, -14e3, 2.0 * xp.pi, -xp.e, 0.0],
        dtype=dtype,
    )
    outp = xp.zeros_like(inp)

-    reference = np_func(inp, inp2)
+    reference = xp_func(inp, inp2)

    inp_field = Field.create_from_numpy_array("inp", inp)
    inp2_field = Field.create_from_numpy_array("inp2", inp)
@@ -115,7 +126,7 @@ def test_binary_functions(target, function_name, dtype):
            outp_field.center(), sp_func(inp_field.center(), inp2_field.center())
        )
    ]
-    gen_config = CreateKernelConfig(target=target, default_dtype=dtype)
+    gen_config = replace(gen_config, default_dtype=dtype)

    kernel = create_kernel(asms, gen_config)
    kfunc = kernel.compile()
@@ -124,26 +135,107 @@ def test_binary_functions(target, function_name, dtype):
    xp.testing.assert_allclose(outp, reference, rtol=resolution)


-@pytest.mark.parametrize('a', [sp.Symbol('a'), fields('a:  float64[2d]').center])
+dtype_and_target_for_integer_funcs = pytest.mark.parametrize(
+    "dtype, target",
+    list(product([np.int32], [t for t in AVAIL_TARGETS if t is not Target.CUDA]))
+    + list(
+        product(
+            [np.int64],
+            [
+                t
+                for t in AVAIL_TARGETS
+                if t not in (Target.X86_SSE, Target.X86_AVX, Target.CUDA)
+            ],
+        )
+    ),
+)
+
+
+@dtype_and_target_for_integer_funcs
+def test_integer_abs(gen_config, xp, dtype):
+    sp_func, xp_func = unary_function("abs", xp)
+
+    smallest = np.iinfo(dtype).min
+    largest = np.iinfo(dtype).max
+
+    inp = xp.array([-1, 0, 1, 3, -5, -312, smallest + 1, largest], dtype=dtype)
+
+    outp = xp.zeros_like(inp)
+    reference = xp_func(inp)
+
+    inp_field = Field.create_from_numpy_array("inp", inp)
+    outp_field = inp_field.new_field_with_different_name("outp")
+
+    asms = [Assignment(outp_field.center(), sp_func(inp_field.center()))]
+    gen_config = replace(gen_config, default_dtype=dtype)
+
+    kernel = create_kernel(asms, gen_config)
+    kfunc = kernel.compile()
+    kfunc(inp=inp, outp=outp)
+
+    xp.testing.assert_array_equal(outp, reference)
+
+
+@pytest.mark.parametrize("function_name", ("min", "max"))
+@dtype_and_target_for_integer_funcs
+def test_integer_binary_functions(gen_config, xp, function_name, dtype):
+    sp_func, xp_func = binary_function(function_name, xp)
+
+    smallest = np.iinfo(dtype).min
+    largest = np.iinfo(dtype).max
+
+    inp1 = xp.array([-1, 0, 1, 3, -5, -312, smallest + 1, largest], dtype=dtype)
+    inp2 = xp.array([3, -5, 1, 12, 1, 11, smallest + 42, largest - 3], dtype=dtype)
+
+    outp = xp.zeros_like(inp1)
+    reference = xp_func(inp1, inp2)
+
+    inp_field = Field.create_from_numpy_array("inp1", inp1)
+    inp2_field = Field.create_from_numpy_array("inp2", inp2)
+    outp_field = inp_field.new_field_with_different_name("outp")
+
+    asms = [
+        Assignment(
+            outp_field.center(), sp_func(inp_field.center(), inp2_field.center())
+        )
+    ]
+    gen_config = replace(gen_config, default_dtype=dtype)
+
+    kernel = create_kernel(asms, gen_config)
+    kfunc = kernel.compile()
+    kfunc(inp1=inp1, inp2=inp2, outp=outp)
+
+    xp.testing.assert_array_equal(outp, reference)
+
+
+@pytest.mark.parametrize("a", [sp.Symbol("a"), fields("a:  float64[2d]").center])
 def test_avoid_pow(a):
-    x = fields('x:  float64[2d]')
+    x = fields("x:  float64[2d]")

-    up = Assignment(x.center_vector[0], 2 * a ** 2 / 3)
+    up = Assignment(x.center_vector[0], 2 * a**2 / 3)
    func = create_kernel(up)

-    powers = list(dfs_preorder(func.body, lambda n: isinstance(n, PsCall) and "pow" in n.function.name))
+    powers = list(
+        dfs_preorder(
+            func.body, lambda n: isinstance(n, PsCall) and "pow" in n.function.name
+        )
+    )
    assert not powers


 @pytest.mark.xfail(reason="fast_div not available yet")
 def test_avoid_pow_fast_div():
-    x = fields('x:  float64[2d]')
-    a = fields('a:  float64[2d]').center
+    x = fields("x:  float64[2d]")
+    a = fields("a:  float64[2d]").center

    up = Assignment(x.center_vector[0], fast_division(1, (a**2)))
    func = create_kernel(up, config=CreateKernelConfig(target=Target.GPU))

-    powers = list(dfs_preorder(func.body, lambda n: isinstance(n, PsCall) and "pow" in n.function.name))
+    powers = list(
+        dfs_preorder(
+            func.body, lambda n: isinstance(n, PsCall) and "pow" in n.function.name
+        )
+    )
    assert not powers


@@ -151,14 +243,23 @@ def test_avoid_pow_move_constants():
    # At the end of the kernel creation the function move_constants_before_loop will be called
    # This function additionally contains substitutions for symbols with the same value
    # Thus it simplifies the equations again
-    x = fields('x:  float64[2d]')
+    x = fields("x:  float64[2d]")
    a, b, c = sp.symbols("a, b, c")

-    up = [Assignment(a, 0.0),
-          Assignment(b, 0.0),
-          Assignment(c, 0.0),
-          Assignment(x.center_vector[0], a**2/18 - a*b/6 - a/18 + b**2/18 + b/18 - c**2/36)]
+    up = [
+        Assignment(a, 0.0),
+        Assignment(b, 0.0),
+        Assignment(c, 0.0),
+        Assignment(
+            x.center_vector[0],
+            a**2 / 18 - a * b / 6 - a / 18 + b**2 / 18 + b / 18 - c**2 / 36,
+        ),
+    ]
    func = create_kernel(up)

-    powers = list(dfs_preorder(func.body, lambda n: isinstance(n, PsCall) and "pow" in n.function.name))
+    powers = list(
+        dfs_preorder(
+            func.body, lambda n: isinstance(n, PsCall) and "pow" in n.function.name
+        )
+    )
    assert not powers
No results found