diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9f80cd261f75d6677ddf70f28331cd99f4fa7397..6c58a26bdaa7bbe4282a8683aa9578634c383051 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -286,7 +286,7 @@ mypy-typecheck:
 tests-and-coverage:
   stage: "Unit Tests"
   needs: []
-  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
   before_script:
     - pip install -e .[tests]
   script:
@@ -318,7 +318,7 @@ tests-and-coverage:
 
 
 build-documentation:
-  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
   stage: docs
   needs: []
   before_script:
diff --git a/docs/Makefile b/docs/Makefile
index 7720d146e574ad59bf65669f2c0acdcecc15e4c1..a293f14ee04261a3d46ac9e6b0924b5b62107a6b 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -12,14 +12,17 @@ BUILDDIR      = build
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
-.PHONY: help Makefile
+.PHONY: help html clean
 
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
+# %: Makefile
+
+html:
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 clean:
 	rm -rf source/reference/generated
+	rm -rf source/api/generated
 	rm -rf source/backend/generated
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/docs/source/api/codegen.rst b/docs/source/api/codegen.rst
new file mode 100644
index 0000000000000000000000000000000000000000..d65e9a358296d017e11395050e5767d82d6569ac
--- /dev/null
+++ b/docs/source/api/codegen.rst
@@ -0,0 +1,72 @@
+pystencils.codegen
+==================
+
+.. module:: pystencils.codegen
+
+Invocation
+----------
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+
+  create_kernel
+  
+Configuration
+-------------
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/entire_class.rst
+
+  CreateKernelConfig
+  CpuOptimConfig
+  OpenMpConfig
+  VectorizationConfig
+  GpuIndexingConfig
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+
+  AUTO
+
+Target Specification
+--------------------
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/recursive_class.rst
+
+  Target
+
+Code Generation Drivers
+-----------------------
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/entire_class.rst
+
+  driver.DefaultKernelCreationDriver
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+
+  get_driver
+
+Output Code Objects
+-------------------
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/entire_class.rst
+
+  Kernel
+  GpuKernel
+  Parameter
+  GpuThreadsRange
diff --git a/docs/source/reference/api/field.rst b/docs/source/api/field.rst
similarity index 100%
rename from docs/source/reference/api/field.rst
rename to docs/source/api/field.rst
diff --git a/docs/source/api/jit.rst b/docs/source/api/jit.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7bcd9989c9f7871eb085e55b7161d1deddda87fc
--- /dev/null
+++ b/docs/source/api/jit.rst
@@ -0,0 +1,40 @@
+pystencils.jit
+==============
+
+.. module:: pystencils.jit
+
+Base Infrastructure
+-------------------
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/entire_class.rst
+
+    KernelWrapper
+    JitBase
+    NoJit
+
+.. autodata:: no_jit
+
+Legacy CPU JIT
+--------------
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/entire_class.rst
+
+  LegacyCpuJit
+
+CuPy-based GPU JIT
+------------------
+
+.. autosummary::
+  :toctree: generated
+  :nosignatures:
+  :template: autosummary/entire_class.rst
+
+  CupyJit
+  CupyKernelWrapper
+  LaunchGrid
diff --git a/docs/source/reference/api/sympyextensions.rst b/docs/source/api/sympyextensions.rst
similarity index 100%
rename from docs/source/reference/api/sympyextensions.rst
rename to docs/source/api/sympyextensions.rst
diff --git a/docs/source/backend/index.rst b/docs/source/backend/index.rst
index 74b57e27b6f267c04a0a08e2eead88cb99726b4e..5ab8dbd34eb37fbc38230f3db0506c572d4b6964 100644
--- a/docs/source/backend/index.rst
+++ b/docs/source/backend/index.rst
@@ -18,7 +18,6 @@ who wish to customize or extend the behaviour of the code generator in their app
     platforms
     transformations
     errors
-    jit
     extensions
 
 Internal Representation
diff --git a/docs/source/backend/jit.rst b/docs/source/backend/jit.rst
deleted file mode 100644
index f7a02dbd4d331046c99c8afccebf5764fc1225ec..0000000000000000000000000000000000000000
--- a/docs/source/backend/jit.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-************************
-Just-In-Time Compilation
-************************
-
-.. automodule:: pystencils.backend.jit
-    :members:
diff --git a/docs/source/backend/objects.rst b/docs/source/backend/objects.rst
index 0505685661f82a467eab3ae2adee38fb0adf6bd3..942e6070f2c997c7bf3e59d67e7c44bd53806e12 100644
--- a/docs/source/backend/objects.rst
+++ b/docs/source/backend/objects.rst
@@ -76,7 +76,7 @@ The above alignment property, for instance, may be added to a pointer symbol by
 to document its assumption that the pointer be properly aligned, in order to emit aligned load and store instructions.
 It then becomes the responsibility of the runtime system embedding the kernel to check this prequesite before calling the kernel.
 To make sure this information becomes visible, any properties attached to symbols exposed as kernel parameters will also
-be added to their respective `KernelParameter` instance.
+be added to their respective `Parameter` instance.
 
 Buffers
 -------
@@ -110,7 +110,7 @@ The context makes sure to avoid name conflicts between buffers.
 API Documentation
 =================
 
-.. automodule:: pystencils.backend.properties
+.. automodule:: pystencils.codegen.properties
     :members:
 
 .. automodule:: pystencils.backend.memory
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 66582cb4b6e847fc6b3022c1726257d866b3ba85..6aa09bdbdf74e070f03d450d37ea501230d88c02 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -69,28 +69,36 @@ Topics
 ------
 
 .. toctree::
-   :maxdepth: 1
-   :caption: Getting Started
+  :maxdepth: 1
+  :caption: Getting Started
 
-   installation
-   tutorials/index
+  installation
+  tutorials/index
 
 .. toctree::
-   :maxdepth: 1
-   :caption: Reference Guides
+  :maxdepth: 1
+  :caption: Reference Guides
 
-   reference/symbolic_language
-   reference/kernelcreation
-   reference/gpu_kernels
-   reference/types
-   reference/api/index
+  reference/symbolic_language
+  reference/kernelcreation
+  reference/gpu_kernels
+  reference/types
 
 .. toctree::
-   :maxdepth: 1
-   :caption: Advanced
+  :maxdepth: 1
+  :caption: API
 
-   migration
-   backend/index
+  api/field
+  api/sympyextensions
+  api/codegen
+  api/jit
+
+.. toctree::
+  :maxdepth: 1
+  :caption: Advanced
+
+  migration
+  backend/index
 
 Projects using pystencils
 -------------------------
diff --git a/docs/source/reference/api/codegen.rst b/docs/source/reference/api/codegen.rst
deleted file mode 100644
index 6418f32f6fe6d78267a373150fe7a2257c5e0b97..0000000000000000000000000000000000000000
--- a/docs/source/reference/api/codegen.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-Code Generator and Configuration
-================================
-
-.. module:: pystencils.kernelcreation
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-
-  create_kernel
-
-.. module:: pystencils.config
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-  :template: autosummary/entire_class.rst
-
-  CreateKernelConfig
-  CpuOptimConfig
-  OpenMpConfig
-  VectorizationConfig
-  GpuIndexingConfig
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-
-  AUTO
\ No newline at end of file
diff --git a/docs/source/reference/api/index.rst b/docs/source/reference/api/index.rst
deleted file mode 100644
index b19c6303eb578f4b621febe507519ae3822df20a..0000000000000000000000000000000000000000
--- a/docs/source/reference/api/index.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-***
-API
-***
-
-Modules
-=======
-
-.. toctree::
-    :maxdepth: 1
-
-    field
-    sympyextensions
-    codegen
diff --git a/docs/source/reference/gpu_kernels.md b/docs/source/reference/gpu_kernels.md
index 1e9456bf7e22e14a4470c737af91b0dc1a5d949d..786840d182b0e06d4e26085cf6a95dbcb31d16b2 100644
--- a/docs/source/reference/gpu_kernels.md
+++ b/docs/source/reference/gpu_kernels.md
@@ -49,9 +49,9 @@ ps.inspect(kernel)
 ```
 
 The `kernel` object returned by the code generator in above snippet is an instance
-of the {py:class}`GpuKernelFunction` class.
-It extends {py:class}`KernelFunction` with some GPU-specific information.
-In particular, it defines the {any}`threads_range <GpuKernelFunction.threads_range>`
+of the {py:class}`GpuKernel` class.
+It extends {py:class}`Kernel` with some GPU-specific information.
+In particular, it defines the {any}`threads_range <GpuKernel.threads_range>`
 property, which tells us how many threads the kernel is expecting to be executed with:
 
 ```{code-cell} ipython3
@@ -208,12 +208,10 @@ only a part of the triangle is being processed.
 
 ```{eval-rst}
 .. autosummary::
-  :toctree: generated
   :nosignatures:
-  :template: autosummary/recursive_class.rst
 
-  pystencils.backend.kernelfunction.GpuKernelFunction
-  pystencils.backend.jit.gpu_cupy.CupyKernelWrapper
+  pystencils.codegen.GpuKernel
+  pystencils.jit.gpu_cupy.CupyKernelWrapper
 ```
 
 :::{admonition} Developers To Do:
@@ -226,4 +224,4 @@ only a part of the triangle is being processed.
 [cupy]: https://cupy.dev "CuPy Homepage"
 [numpy]: https://numpy.org "NumPy Homepage"
 [nvcc]: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html "NVIDIA CUDA Compiler Driver"
-[cupy-docs]: https://docs.cupy.dev/en/stable/overview.html "CuPy Documentation"
\ No newline at end of file
+[cupy-docs]: https://docs.cupy.dev/en/stable/overview.html "CuPy Documentation"
diff --git a/docs/source/reference/kernelcreation.md b/docs/source/reference/kernelcreation.md
index af8c01456cdcfebe911c3b26c913fb36b0c744cd..248855fc1c755d9fee4c62c7db07d469d3ac84ed 100644
--- a/docs/source/reference/kernelcreation.md
+++ b/docs/source/reference/kernelcreation.md
@@ -34,17 +34,19 @@ and their effects on the generated kernel.
 
 ## Running the Code Generator
 
-The primary way to invoke the code generation engine is through the `create_kernel` function.
+The primary way to invoke the code generation engine is through the {any}`create_kernel` function.
 It takes two arguments:
 - the list of assignment that make up the kernel (optionally wrapped as an ``AssignmentCollection``),
-- and a configuration object, an instance of {any}`CreateKernelConfig <pystencils.config.CreateKernelConfig>`.
+- and a configuration object, an instance of {any}`CreateKernelConfig <pystencils.codegen.config.CreateKernelConfig>`.
 
 ```{eval-rst}
+.. currentmodule:: pystencils.codegen
+
 .. autosummary::
   :nosignatures:
 
-  pystencils.kernelcreation.create_kernel
-  pystencils.config.CreateKernelConfig
+  create_kernel
+  CreateKernelConfig
 ```
 
 For a simple kernel, an invocation of the code generator might look like this:
@@ -82,7 +84,7 @@ The above snippet defines a five-point-stencil Jacobi update. A few noteworthy t
 
 ## Inspecting the Generated Code
 
-The object returned by the code generator, here named `kernel`, is an instance of the {any}`KernelFunction` class.
+The object returned by the code generator, here named `kernel`, is an instance of the {any}`Kernel` class.
 This object stores the kernel's name, its list of parameters, the set of fields it operates on, and its hardware target.
 Also, it of course holds the kernel itself, in the form of an [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST).
 This tree can be printed out as compilable code in the target language (C++ or, in this case, CUDA),
@@ -110,21 +112,14 @@ their interaction and effects, use cases and caveats.
 Pystencils supports code generation for a variety of CPU and GPU hardware.
 
 ```{eval-rst}
-.. currentmodule:: pystencils.config
+.. currentmodule:: pystencils.codegen
 
 .. autosummary::
   :nosignatures:
 
   CreateKernelConfig.target
-
-.. module:: pystencils.target
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-  :template: autosummary/recursive_class.rst
-
   Target
+
 ```
 
 ### Data Types
@@ -176,7 +171,7 @@ are using the `int32` data type, as specified in {py:data}`index_dtype <CreateKe
 ```{code-cell} ipython3
 :tags: [remove-input]
 
-driver = ps.kernelcreation.get_driver(cfg, retain_intermediates=True)
+driver = ps.codegen.get_driver(cfg, retain_intermediates=True)
 kernel = driver(assignments)
 ps.inspect(driver.intermediates.materialized_ispace, show_cpp=False)
 ```
@@ -186,7 +181,7 @@ To learn more about inspecting code after different stages of the code generator
 :::
 
 ```{eval-rst}
-.. currentmodule:: pystencils.config
+.. currentmodule:: pystencils.codegen
 
 .. autosummary::
   :nosignatures:
@@ -220,7 +215,7 @@ only one of which can be specified at a time:
 :::
 
 ```{eval-rst}
-.. currentmodule:: pystencils.config
+.. currentmodule:: pystencils.codegen
 
 .. autosummary::
   :nosignatures:
@@ -260,7 +255,7 @@ boundary values or exchange data in MPI-parallel simulations.
 ##### Automatic Ghost Layers
 
 The easiest way to define an iteration space with ghost layers
-is to set `ghost_layers=ps.config.AUTO`, which is also the default
+is to set `ghost_layers=ps.AUTO`, which is also the default
 when no iteration space options are specified.
 In this case, the code generator will examine the kernel to find the maximum range
 of its stencil -- that is, the maximum neighbor offset encountered in any field access.
@@ -281,11 +276,11 @@ To illustrate, the following kernel accesses neighbor nodes with a maximum offse
 ```{code-cell} ipython3
 ranged_update = ps.Assignment(u.center(), v[-2, -1] + v[2, 1])
 
-cfg = ps.CreateKernelConfig(ghost_layers=ps.config.AUTO)
+cfg = ps.CreateKernelConfig(ghost_layers=ps.AUTO)
 kernel = ps.create_kernel(ranged_update, cfg)
 ```
 
-With `ghost_layers=ps.config.AUTO`, its iteration space will look like this (yellow cells are included, purple cells excluded).
+With `ghost_layers=ps.AUTO`, its iteration space will look like this (yellow cells are included, purple cells excluded).
 
 ```{code-cell} ipython3
 :tags: [remove-input]
@@ -506,22 +501,7 @@ assignments = [
 ```
 
 ```{code-cell} ipython3
-driver = ps.kernelcreation.get_driver(cfg, retain_intermediates=True)
+driver = ps.codegen.get_driver(cfg, retain_intermediates=True)
 kernel = driver(assignments)
 ps.inspect(driver.intermediates)
 ```
-
-## API: Kernel Parameters and Function Objects
-
-```{eval-rst}
-.. module:: pystencils.backend.kernelfunction
-
-.. autosummary::
-  :toctree: generated
-  :nosignatures:
-  :template: autosummary/entire_class.rst
-
-  KernelParameter
-  KernelFunction
-  GpuKernelFunction
-```
diff --git a/docs/source/tutorials/01_tutorial_getting_started.ipynb b/docs/source/tutorials/01_tutorial_getting_started.ipynb
index baa3aac6ac9ad5a42db9244ff03d5f34e246530f..5ce765fcea33088463c5e5274cab8fb5654f6229 100644
--- a/docs/source/tutorials/01_tutorial_getting_started.ipynb
+++ b/docs/source/tutorials/01_tutorial_getting_started.ipynb
@@ -1140,7 +1140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1270,8 +1270,8 @@
    "source": [
     "ast = ps.create_kernel(\n",
     "    update_rule,\n",
-    "    cpu_optim = ps.config.CpuOptimConfig(\n",
-    "        openmp=ps.config.OpenMpConfig(num_threads=2))\n",
+    "    cpu_optim = ps.CpuOptimConfig(\n",
+    "        openmp=ps.OpenMpConfig(num_threads=2))\n",
     "    )\n",
     "\n",
     "ps.show_code(ast)"
@@ -1472,7 +1472,7 @@
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
diff --git a/pytest.ini b/pytest.ini
index b43b0f00ce528fc8ca1cdd8355c0168d98b6e608..281eaa21ec0b07aabfbb1a16c3ad9938946e6371 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -40,6 +40,7 @@ omit = doc/*
        src/pystencils/cache.py
        src/pystencils/pacxx/benchmark.py
        src/pystencils/_version.py
+       src/pystencils/_deprecation.py
        src/pystencils/old
        venv/
 
@@ -62,6 +63,9 @@ exclude_lines =
        if False:
        if __name__ == .__main__.:
 
+       # Don't cover type checking imports
+       if TYPE_CHECKING:
+
 skip_covered = True
 fail_under = 80
 
diff --git a/src/pystencils/__init__.py b/src/pystencils/__init__.py
index b2cdeca07d6040e198f23f1b5666352fdfb991be..4f8b2660777dbe7a1fa535cfbbc2e64458a0d692 100644
--- a/src/pystencils/__init__.py
+++ b/src/pystencils/__init__.py
@@ -1,6 +1,14 @@
 """Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
 
-from .target import Target
+from .codegen import (
+    Target,
+    CreateKernelConfig,
+    CpuOptimConfig,
+    VectorizationConfig,
+    OpenMpConfig,
+    GpuIndexingConfig,
+    AUTO
+)
 from .defaults import DEFAULTS
 from . import fd
 from . import stencil as stencil
@@ -9,17 +17,10 @@ from .inspection import inspect
 from .field import Field, FieldType, fields
 from .types import create_type, create_numeric_type
 from .cache import clear_cache
-from .config import (
-    CreateKernelConfig,
-    CpuOptimConfig,
-    VectorizationConfig,
-    OpenMpConfig,
-    GpuIndexingConfig,
-)
 from .kernel_decorator import kernel, kernel_config
 from .kernelcreation import create_kernel, create_staggered_kernel
-from .backend.kernelfunction import KernelFunction
-from .backend.jit import no_jit
+from .codegen import Kernel
+from .jit import no_jit
 from .backend.exceptions import KernelConstraintsError
 from .slicing import make_slice
 from .spatial_coordinates import (
@@ -53,9 +54,10 @@ __all__ = [
     "VectorizationConfig",
     "GpuIndexingConfig",
     "OpenMpConfig",
+    "AUTO",
     "create_kernel",
     "create_staggered_kernel",
-    "KernelFunction",
+    "Kernel",
     "KernelConstraintsError",
     "Target",
     "no_jit",
diff --git a/src/pystencils/_deprecation.py b/src/pystencils/_deprecation.py
index 29ee648a7a9655e556986ae5404e335b94924fc9..68218f0ae3992cfca35db7f44d251be6fe609ea7 100644
--- a/src/pystencils/_deprecation.py
+++ b/src/pystencils/_deprecation.py
@@ -5,4 +5,5 @@ def _deprecated(feature, instead, version="2.1"):
         f"{feature} is deprecated and will be removed in pystencils {version}."
         f"Use {instead} instead.",
         DeprecationWarning,
+        stacklevel=2
     )
diff --git a/src/pystencils/backend/__init__.py b/src/pystencils/backend/__init__.py
index b947a112ecb2be7762fefdf54afd4dffc185c319..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644
--- a/src/pystencils/backend/__init__.py
+++ b/src/pystencils/backend/__init__.py
@@ -1,14 +0,0 @@
-from .kernelfunction import (
-    KernelParameter,
-    KernelFunction,
-    GpuKernelFunction,
-)
-
-from .constraints import KernelParamsConstraint
-
-__all__ = [
-    "KernelParameter",
-    "KernelFunction",
-    "GpuKernelFunction",
-    "KernelParamsConstraint",
-]
diff --git a/src/pystencils/backend/constraints.py b/src/pystencils/backend/constraints.py
deleted file mode 100644
index 229f6718c65e5e4941e33aa09b5363f5962abae5..0000000000000000000000000000000000000000
--- a/src/pystencils/backend/constraints.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, TYPE_CHECKING
-from dataclasses import dataclass
-
-if TYPE_CHECKING:
-    from .kernelfunction import KernelParameter
-
-
-@dataclass
-class KernelParamsConstraint:
-    condition: Any  # FIXME Implement conditions
-    message: str = ""
-
-    def to_code(self):
-        raise NotImplementedError()
-
-    def get_parameters(self) -> set[KernelParameter]:
-        raise NotImplementedError()
-
-    def __str__(self) -> str:
-        return f"{self.message} [{self.condition}]"
diff --git a/src/pystencils/backend/emission/base_printer.py b/src/pystencils/backend/emission/base_printer.py
index 50cd1bfeaf7f97295e4c6c557fa26acf933c04b7..a4358bbf328b65aaf5e45eff5a2083ef067285a6 100644
--- a/src/pystencils/backend/emission/base_printer.py
+++ b/src/pystencils/backend/emission/base_printer.py
@@ -1,8 +1,9 @@
 from __future__ import annotations
 from enum import Enum
 from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
 
-from ...target import Target
+from ...codegen import Target
 
 from ..ast.structural import (
     PsAstNode,
@@ -59,7 +60,8 @@ from ..memory import PsSymbol
 from ..constants import PsConstant
 from ...types import PsType
 
-from ..kernelfunction import KernelFunction, GpuKernelFunction
+if TYPE_CHECKING:
+    from ...codegen import Kernel
 
 
 class EmissionError(Exception):
@@ -172,8 +174,9 @@ class BasePrinter(ABC):
     def __init__(self, indent_width=3):
         self._indent_width = indent_width
 
-    def __call__(self, obj: PsAstNode | KernelFunction) -> str:
-        if isinstance(obj, KernelFunction):
+    def __call__(self, obj: PsAstNode | Kernel) -> str:
+        from ...codegen import Kernel
+        if isinstance(obj, Kernel):
             sig = self.print_signature(obj)
             body_code = self.visit(obj.body, PrinterCtx())
             return f"{sig}\n{body_code}"
@@ -372,7 +375,7 @@ class BasePrinter(ABC):
                     f"BasePrinter does not know how to print {type(node)}"
                 )
 
-    def print_signature(self, func: KernelFunction) -> str:
+    def print_signature(self, func: Kernel) -> str:
         prefix = self._func_prefix(func)
         params_str = ", ".join(
             f"{self._type_str(p.dtype)} {p.name}" for p in func.parameters
@@ -380,8 +383,10 @@ class BasePrinter(ABC):
         signature = " ".join([prefix, "void", func.name, f"({params_str})"])
         return signature
 
-    def _func_prefix(self, func: KernelFunction):
-        if isinstance(func, GpuKernelFunction) and func.target == Target.CUDA:
+    def _func_prefix(self, func: Kernel):
+        from ...codegen import GpuKernel
+
+        if isinstance(func, GpuKernel) and func.target == Target.CUDA:
             return "__global__"
         else:
             return "FUNC_PREFIX"
diff --git a/src/pystencils/backend/emission/c_printer.py b/src/pystencils/backend/emission/c_printer.py
index 95e27bd66732f04e3d20767cf8c6d35d0cfd2450..90a7e54e22b3eb14866c9260c85247baf8b4f340 100644
--- a/src/pystencils/backend/emission/c_printer.py
+++ b/src/pystencils/backend/emission/c_printer.py
@@ -1,18 +1,23 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
 from pystencils.backend.ast.astnode import PsAstNode
 from pystencils.backend.constants import PsConstant
 from pystencils.backend.emission.base_printer import PrinterCtx, EmissionError
 from pystencils.backend.memory import PsSymbol
 from .base_printer import BasePrinter
 
-from ..kernelfunction import KernelFunction
 from ...types import PsType, PsArrayType, PsScalarType, PsTypeError
 from ..ast.expressions import PsBufferAcc
 from ..ast.vector import PsVecMemAcc
 
+if TYPE_CHECKING:
+    from ...codegen import Kernel
+
 
-def emit_code(kernel: KernelFunction):
+def emit_code(ast: PsAstNode | Kernel):
     printer = CAstPrinter()
-    return printer(kernel)
+    return printer(ast)
 
 
 class CAstPrinter(BasePrinter):
diff --git a/src/pystencils/backend/emission/ir_printer.py b/src/pystencils/backend/emission/ir_printer.py
index 124ce200d3aab9e3b111dd0481bd8bc7faad817f..ffb65181ccd71ff95dffd6d006617dadc6809eea 100644
--- a/src/pystencils/backend/emission/ir_printer.py
+++ b/src/pystencils/backend/emission/ir_printer.py
@@ -1,3 +1,6 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
 from pystencils.backend.constants import PsConstant
 from pystencils.backend.emission.base_printer import PrinterCtx
 from pystencils.backend.memory import PsSymbol
@@ -9,8 +12,11 @@ from ..ast import PsAstNode
 from ..ast.expressions import PsBufferAcc
 from ..ast.vector import PsVecMemAcc, PsVecBroadcast
 
+if TYPE_CHECKING:
+    from ...codegen import Kernel
+
 
-def emit_ir(ir: PsAstNode):
+def emit_ir(ir: PsAstNode | Kernel):
     """Emit the IR as C-like pseudo-code for inspection."""
     ir_printer = IRAstPrinter()
     return ir_printer(ir)
diff --git a/src/pystencils/backend/kernelcreation/context.py b/src/pystencils/backend/kernelcreation/context.py
index 1cf159cf4a07b85122cb574f334b99509cee000b..39fb8ef6dac855553b7e18d2a688c67ca45fb227 100644
--- a/src/pystencils/backend/kernelcreation/context.py
+++ b/src/pystencils/backend/kernelcreation/context.py
@@ -10,7 +10,6 @@ from ...field import Field, FieldType
 from ...sympyextensions.typed_sympy import TypedSymbol, DynamicType
 
 from ..memory import PsSymbol, PsBuffer
-from ..properties import FieldShape, FieldStride
 from ..constants import PsConstant
 from ...types import (
     PsType,
@@ -19,7 +18,6 @@ from ...types import (
     PsPointerType,
     deconstify,
 )
-from ..constraints import KernelParamsConstraint
 from ..exceptions import PsInternalCompilerError, KernelConstraintsError
 
 from .iteration_space import IterationSpace, FullIterationSpace, SparseIterationSpace
@@ -82,7 +80,6 @@ class KernelCreationContext:
 
         self._ispace: IterationSpace | None = None
 
-        self._constraints: list[KernelParamsConstraint] = []
         self._req_headers: set[str] = set()
 
         self._metadata: dict[str, Any] = dict()
@@ -97,15 +94,6 @@ class KernelCreationContext:
         """Data type used by default for index expressions"""
         return self._index_dtype
 
-    #   Constraints
-
-    def add_constraints(self, *constraints: KernelParamsConstraint):
-        self._constraints += constraints
-
-    @property
-    def constraints(self) -> tuple[KernelParamsConstraint, ...]:
-        return tuple(self._constraints)
-
     @property
     def metadata(self) -> dict[str, Any]:
         return self._metadata
@@ -371,6 +359,8 @@ class KernelCreationContext:
             buf_shape += [convert_size(1)]
             buf_strides += [convert_size(1)]
 
+        from ...codegen.properties import FieldShape, FieldStride
+
         for i, size in enumerate(buf_shape):
             if isinstance(size, PsSymbol):
                 size.add_property(FieldShape(field, i))
@@ -410,6 +400,8 @@ class KernelCreationContext:
         buf_shape: list[PsSymbol | PsConstant]
 
         if isinstance(buffer_len, TypedSymbol):
+            from ...codegen.properties import FieldShape
+
             idx_type = self._normalize_type(buffer_len)
             len_symb = self.get_symbol(buffer_len.name, idx_type)
             len_symb.add_property(FieldShape(field, 0))
diff --git a/src/pystencils/backend/kernelcreation/iteration_space.py b/src/pystencils/backend/kernelcreation/iteration_space.py
index 9df9883ce67d6d856335a7a7a9537f829b7df11e..031a0d843f3f5a648f2cd8c390134ba308c1c833 100644
--- a/src/pystencils/backend/kernelcreation/iteration_space.py
+++ b/src/pystencils/backend/kernelcreation/iteration_space.py
@@ -6,7 +6,6 @@ from functools import reduce
 from operator import mul
 
 from ...defaults import DEFAULTS
-from ...config import _AUTO_TYPE, AUTO
 from ...simp import AssignmentCollection
 from ...field import Field, FieldType
 
@@ -18,6 +17,7 @@ from ...types import PsStructType
 from ..exceptions import PsInputError, KernelConstraintsError
 
 if TYPE_CHECKING:
+    from ...codegen.config import _AUTO_TYPE
     from .context import KernelCreationContext
 
 
@@ -457,6 +457,8 @@ def create_full_iteration_space(
     # Otherwise, if an iteration slice was specified, use that
     # Otherwise, use the inferred ghost layers
 
+    from ...codegen.config import AUTO, _AUTO_TYPE
+
     if ghost_layers is AUTO:
         if len(domain_field_accesses) > 0:
             inferred_gls = max(
diff --git a/src/pystencils/backend/kernelfunction.py b/src/pystencils/backend/kernelfunction.py
deleted file mode 100644
index e2161590e7023728d55b35f7355d25ea94e21438..0000000000000000000000000000000000000000
--- a/src/pystencils/backend/kernelfunction.py
+++ /dev/null
@@ -1,342 +0,0 @@
-from __future__ import annotations
-
-from warnings import warn
-from typing import Callable, Sequence, Iterable, Any, TYPE_CHECKING
-from itertools import chain
-
-from .._deprecation import _deprecated
-
-from .ast.structural import PsBlock
-from .ast.analysis import collect_required_headers, collect_undefined_symbols
-from .memory import PsSymbol
-from .properties import (
-    PsSymbolProperty,
-    _FieldProperty,
-    FieldShape,
-    FieldStride,
-    FieldBasePtr,
-)
-from .kernelcreation.context import KernelCreationContext
-from .platforms import Platform, GpuThreadsRange
-
-from .constraints import KernelParamsConstraint
-from ..types import PsType
-
-from ..target import Target
-from ..field import Field
-from ..sympyextensions import TypedSymbol
-
-if TYPE_CHECKING:
-    from .jit import JitBase
-
-
-class KernelParameter:
-    """Parameter to a `KernelFunction`."""
-
-    __match_args__ = ("name", "dtype", "properties")
-
-    def __init__(
-        self, name: str, dtype: PsType, properties: Iterable[PsSymbolProperty] = ()
-    ):
-        self._name = name
-        self._dtype = dtype
-        self._properties: frozenset[PsSymbolProperty] = (
-            frozenset(properties) if properties is not None else frozenset()
-        )
-        self._fields: tuple[Field, ...] = tuple(
-            sorted(
-                set(
-                    p.field  # type: ignore
-                    for p in filter(
-                        lambda p: isinstance(p, _FieldProperty), self._properties
-                    )
-                ),
-                key=lambda f: f.name
-            )
-        )
-
-    @property
-    def name(self):
-        return self._name
-
-    @property
-    def dtype(self):
-        return self._dtype
-
-    def _hashable_contents(self):
-        return (self._name, self._dtype, self._properties)
-
-    #   TODO: Need?
-    def __hash__(self) -> int:
-        return hash(self._hashable_contents())
-
-    def __eq__(self, other: object) -> bool:
-        if not isinstance(other, KernelParameter):
-            return False
-
-        return (
-            type(self) is type(other)
-            and self._hashable_contents() == other._hashable_contents()
-        )
-
-    def __str__(self) -> str:
-        return self._name
-
-    def __repr__(self) -> str:
-        return f"{type(self).__name__}(name = {self._name}, dtype = {self._dtype})"
-
-    @property
-    def symbol(self) -> TypedSymbol:
-        return TypedSymbol(self.name, self.dtype)
-
-    @property
-    def fields(self) -> Sequence[Field]:
-        """Set of fields associated with this parameter."""
-        return self._fields
-
-    def get_properties(
-        self, prop_type: type[PsSymbolProperty] | tuple[type[PsSymbolProperty], ...]
-    ) -> set[PsSymbolProperty]:
-        """Retrieve all properties of the given type(s) attached to this parameter"""
-        return set(filter(lambda p: isinstance(p, prop_type), self._properties))
-
-    @property
-    def properties(self) -> frozenset[PsSymbolProperty]:
-        return self._properties
-
-    @property
-    def is_field_parameter(self) -> bool:
-        return bool(self._fields)
-
-    #   Deprecated legacy properties
-    #   These are kept mostly for the legacy waLBerla code generation system
-
-    @property
-    def is_field_pointer(self) -> bool:
-        warn(
-            "`is_field_pointer` is deprecated and will be removed in a future version of pystencils. "
-            "Use `param.get_properties(FieldBasePtr)` instead.",
-            DeprecationWarning,
-        )
-        return bool(self.get_properties(FieldBasePtr))
-
-    @property
-    def is_field_stride(self) -> bool:
-        warn(
-            "`is_field_stride` is deprecated and will be removed in a future version of pystencils. "
-            "Use `param.get_properties(FieldStride)` instead.",
-            DeprecationWarning,
-        )
-        return bool(self.get_properties(FieldStride))
-
-    @property
-    def is_field_shape(self) -> bool:
-        warn(
-            "`is_field_shape` is deprecated and will be removed in a future version of pystencils. "
-            "Use `param.get_properties(FieldShape)` instead.",
-            DeprecationWarning,
-        )
-        return bool(self.get_properties(FieldShape))
-
-    @property
-    def field_name(self) -> str:
-        warn(
-            "`field_name` is deprecated and will be removed in a future version of pystencils. "
-            "Use `param.fields[0].name` instead.",
-            DeprecationWarning,
-        )
-        return self._fields[0].name
-
-
-class KernelFunction:
-    """A pystencils kernel function.
-
-    The kernel function is the final result of the translation process.
-    It is immutable, and its AST should not be altered any more, either, as this
-    might invalidate information about the kernel already stored in the `KernelFunction` object.
-    """
-
-    def __init__(
-        self,
-        body: PsBlock,
-        target: Target,
-        name: str,
-        parameters: Sequence[KernelParameter],
-        required_headers: set[str],
-        constraints: Sequence[KernelParamsConstraint],
-        jit: JitBase,
-    ):
-        self._body: PsBlock = body
-        self._target = target
-        self._name = name
-        self._params = tuple(parameters)
-        self._required_headers = required_headers
-        self._constraints = tuple(constraints)
-        self._jit = jit
-        self._metadata: dict[str, Any] = dict()
-
-    @property
-    def metadata(self) -> dict[str, Any]:
-        return self._metadata
-
-    @property
-    def body(self) -> PsBlock:
-        return self._body
-
-    @property
-    def target(self) -> Target:
-        return self._target
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @name.setter
-    def name(self, n: str):
-        self._name = n
-
-    @property
-    def function_name(self) -> str:
-        _deprecated("function_name", "name")
-        return self._name
-
-    @function_name.setter
-    def function_name(self, n: str):
-        _deprecated("function_name", "name")
-        self._name = n
-
-    @property
-    def parameters(self) -> tuple[KernelParameter, ...]:
-        return self._params
-
-    def get_parameters(self) -> tuple[KernelParameter, ...]:
-        _deprecated("KernelFunction.get_parameters", "KernelFunction.parameters")
-        return self.parameters
-
-    def get_fields(self) -> set[Field]:
-        return set(chain.from_iterable(p.fields for p in self._params))
-
-    @property
-    def fields_accessed(self) -> set[Field]:
-        warn(
-            "`fields_accessed` is deprecated and will be removed in a future version of pystencils. "
-            "Use `get_fields` instead.",
-            DeprecationWarning,
-        )
-        return self.get_fields()
-
-    @property
-    def required_headers(self) -> set[str]:
-        return self._required_headers
-
-    @property
-    def constraints(self) -> tuple[KernelParamsConstraint, ...]:
-        return self._constraints
-
-    def compile(self) -> Callable[..., None]:
-        """Invoke the underlying just-in-time compiler to obtain the kernel as an executable Python function."""
-        return self._jit.compile(self)
-
-
-def create_cpu_kernel_function(
-    ctx: KernelCreationContext,
-    platform: Platform,
-    body: PsBlock,
-    function_name: str,
-    target_spec: Target,
-    jit: JitBase,
-):
-    undef_symbols = collect_undefined_symbols(body)
-
-    params = _get_function_params(ctx, undef_symbols)
-    req_headers = _get_headers(ctx, platform, body)
-
-    kfunc = KernelFunction(
-        body, target_spec, function_name, params, req_headers, ctx.constraints, jit
-    )
-    kfunc.metadata.update(ctx.metadata)
-    return kfunc
-
-
-class GpuKernelFunction(KernelFunction):
-    """Internal representation of a kernel function targeted at CUDA GPUs."""
-
-    def __init__(
-        self,
-        body: PsBlock,
-        threads_range: GpuThreadsRange | None,
-        target: Target,
-        name: str,
-        parameters: Sequence[KernelParameter],
-        required_headers: set[str],
-        constraints: Sequence[KernelParamsConstraint],
-        jit: JitBase,
-    ):
-        super().__init__(
-            body, target, name, parameters, required_headers, constraints, jit
-        )
-        self._threads_range = threads_range
-
-    @property
-    def threads_range(self) -> GpuThreadsRange | None:
-        """Object exposing the total size of the launch grid this kernel expects to be executed with."""
-        return self._threads_range
-
-
-def create_gpu_kernel_function(
-    ctx: KernelCreationContext,
-    platform: Platform,
-    body: PsBlock,
-    threads_range: GpuThreadsRange | None,
-    function_name: str,
-    target_spec: Target,
-    jit: JitBase,
-):
-    undef_symbols = collect_undefined_symbols(body)
-
-    if threads_range is not None:
-        for threads in threads_range.num_work_items:
-            undef_symbols |= collect_undefined_symbols(threads)
-
-    params = _get_function_params(ctx, undef_symbols)
-    req_headers = _get_headers(ctx, platform, body)
-
-    kfunc = GpuKernelFunction(
-        body,
-        threads_range,
-        target_spec,
-        function_name,
-        params,
-        req_headers,
-        ctx.constraints,
-        jit,
-    )
-    kfunc.metadata.update(ctx.metadata)
-    return kfunc
-
-
-def _get_function_params(ctx: KernelCreationContext, symbols: Iterable[PsSymbol]):
-    params: list[KernelParameter] = []
-
-    from pystencils.backend.memory import BufferBasePtr
-
-    for symb in symbols:
-        props: set[PsSymbolProperty] = set()
-        for prop in symb.properties:
-            match prop:
-                case FieldShape() | FieldStride():
-                    props.add(prop)
-                case BufferBasePtr(buf):
-                    field = ctx.find_field(buf.name)
-                    props.add(FieldBasePtr(field))
-        params.append(KernelParameter(symb.name, symb.get_dtype(), props))
-
-    params.sort(key=lambda p: p.name)
-    return params
-
-
-def _get_headers(ctx: KernelCreationContext, platform: Platform, body: PsBlock):
-    req_headers = collect_required_headers(body)
-    req_headers |= platform.required_headers
-    req_headers |= ctx.required_headers
-    return req_headers
diff --git a/src/pystencils/backend/memory.py b/src/pystencils/backend/memory.py
index fcfae9f908da2da9cc01f422d285979c02a7a26e..7a5d62f691d81a0f251329c47216f65a981ef291 100644
--- a/src/pystencils/backend/memory.py
+++ b/src/pystencils/backend/memory.py
@@ -6,7 +6,7 @@ from dataclasses import dataclass
 from ..types import PsType, PsTypeError, deconstify, PsIntegerType, PsPointerType
 from .exceptions import PsInternalCompilerError
 from .constants import PsConstant
-from .properties import PsSymbolProperty, UniqueSymbolProperty
+from ..codegen.properties import PsSymbolProperty, UniqueSymbolProperty
 
 
 class PsSymbol:
diff --git a/src/pystencils/backend/platforms/__init__.py b/src/pystencils/backend/platforms/__init__.py
index 9332453c6c1b60255f1869f011bfa661ee670ea0..589841db87efb598ffeed20d4d11db7ffcd452cc 100644
--- a/src/pystencils/backend/platforms/__init__.py
+++ b/src/pystencils/backend/platforms/__init__.py
@@ -1,6 +1,6 @@
 from .platform import Platform
 from .generic_cpu import GenericCpu, GenericVectorCpu
-from .generic_gpu import GenericGpu, GpuThreadsRange
+from .generic_gpu import GenericGpu
 from .cuda import CudaPlatform
 from .x86 import X86VectorCpu, X86VectorArch
 from .sycl import SyclPlatform
@@ -12,7 +12,6 @@ __all__ = [
     "X86VectorCpu",
     "X86VectorArch",
     "GenericGpu",
-    "GpuThreadsRange",
     "CudaPlatform",
     "SyclPlatform",
 ]
diff --git a/src/pystencils/backend/platforms/cuda.py b/src/pystencils/backend/platforms/cuda.py
index 7ebbd4fd4762821d3beb6f0cbbc04a33f775ecdf..f146cfbfd3d3d4f106bc82571bd613798644f241 100644
--- a/src/pystencils/backend/platforms/cuda.py
+++ b/src/pystencils/backend/platforms/cuda.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
 from warnings import warn
+from typing import TYPE_CHECKING
 
 from ...types import constify
 from ..exceptions import MaterializationError
-from .generic_gpu import GenericGpu, GpuThreadsRange
+from .generic_gpu import GenericGpu
 
 from ..kernelcreation import (
     Typifier,
@@ -26,7 +28,9 @@ from ..ast.expressions import PsLt, PsAnd
 from ...types import PsSignedIntegerType, PsIeeeFloatType
 from ..literals import PsLiteral
 from ..functions import PsMathFunction, MathFunctions, CFunction
-from ...config import GpuIndexingConfig
+
+if TYPE_CHECKING:
+    from ...codegen import GpuIndexingConfig, GpuThreadsRange
 
 int32 = PsSignedIntegerType(width=32, const=False)
 
@@ -51,6 +55,9 @@ class CudaPlatform(GenericGpu):
         self, ctx: KernelCreationContext, indexing_cfg: GpuIndexingConfig | None = None
     ) -> None:
         super().__init__(ctx)
+
+        from ...codegen.config import GpuIndexingConfig
+
         self._cfg = indexing_cfg if indexing_cfg is not None else GpuIndexingConfig()
         self._typify = Typifier(ctx)
 
@@ -136,7 +143,7 @@ class CudaPlatform(GenericGpu):
 
         if not self._cfg.manual_launch_grid:
             try:
-                threads_range = GpuThreadsRange.from_ispace(ispace)
+                threads_range = self.threads_from_ispace(ispace)
             except MaterializationError as e:
                 warn(
                     str(e.args[0])
@@ -214,7 +221,7 @@ class CudaPlatform(GenericGpu):
             body.statements = [sparse_idx_decl] + body.statements
             ast = body
 
-        return ast, GpuThreadsRange.from_ispace(ispace)
+        return ast, self.threads_from_ispace(ispace)
 
     def _linear_thread_idx(self, coord: int):
         block_size = BLOCK_DIM[coord]
diff --git a/src/pystencils/backend/platforms/generic_gpu.py b/src/pystencils/backend/platforms/generic_gpu.py
index 975d39d22ec7edeb2e6e640a4460c30509756973..15df36cdd9cf416a8438f12816cbe00cfaeea204 100644
--- a/src/pystencils/backend/platforms/generic_gpu.py
+++ b/src/pystencils/backend/platforms/generic_gpu.py
@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Sequence
+from typing import TYPE_CHECKING
 from abc import abstractmethod
 
 from ..ast.expressions import PsExpression
@@ -12,55 +12,33 @@ from ..kernelcreation.iteration_space import (
 from .platform import Platform
 from ..exceptions import MaterializationError
 
+if TYPE_CHECKING:
+    from ...codegen.kernel import GpuThreadsRange
 
-class GpuThreadsRange:
-    """Number of threads required by a GPU kernel, in order (x, y, z)."""
 
-    @staticmethod
-    def from_ispace(ispace: IterationSpace) -> GpuThreadsRange:
+class GenericGpu(Platform):
+    @abstractmethod
+    def materialize_iteration_space(
+        self, body: PsBlock, ispace: IterationSpace
+    ) -> tuple[PsBlock, GpuThreadsRange | None]:
+        pass
+
+    @classmethod
+    def threads_from_ispace(cls, ispace: IterationSpace) -> GpuThreadsRange:
+        from ...codegen.kernel import GpuThreadsRange
+
         if isinstance(ispace, FullIterationSpace):
-            return GpuThreadsRange._from_full_ispace(ispace)
+            return cls._threads_from_full_ispace(ispace)
         elif isinstance(ispace, SparseIterationSpace):
             work_items = (PsExpression.make(ispace.index_list.shape[0]),)
             return GpuThreadsRange(work_items)
         else:
             assert False
 
-    def __init__(
-        self,
-        num_work_items: Sequence[PsExpression],
-    ):
-        self._dim = len(num_work_items)
-        self._num_work_items = tuple(num_work_items)
-
-    # @property
-    # def grid_size(self) -> tuple[PsExpression, ...]:
-    #     return self._grid_size
-
-    # @property
-    # def block_size(self) -> tuple[PsExpression, ...]:
-    #     return self._block_size
-
-    @property
-    def num_work_items(self) -> tuple[PsExpression, ...]:
-        """Number of work items in (x, y, z)-order."""
-        return self._num_work_items
-
-    @property
-    def dim(self) -> int:
-        return self._dim
-    
-    def __str__(self) -> str:
-        rep = "GpuThreadsRange { "
-        rep += "; ".join(f"{x}: {w}" for x, w in zip("xyz", self._num_work_items))
-        rep += " }"
-        return rep
-    
-    def _repr_html_(self) -> str:
-        return str(self)
-
-    @staticmethod
-    def _from_full_ispace(ispace: FullIterationSpace) -> GpuThreadsRange:
+    @classmethod
+    def _threads_from_full_ispace(cls, ispace: FullIterationSpace) -> GpuThreadsRange:
+        from ...codegen.kernel import GpuThreadsRange
+        
         dimensions = ispace.dimensions_in_loop_order()[::-1]
         if len(dimensions) > 3:
             raise NotImplementedError(
@@ -81,11 +59,3 @@ class GpuThreadsRange:
 
         work_items = [ispace.actual_iterations(dim) for dim in dimensions]
         return GpuThreadsRange(work_items)
-
-
-class GenericGpu(Platform):
-    @abstractmethod
-    def materialize_iteration_space(
-        self, body: PsBlock, ispace: IterationSpace
-    ) -> tuple[PsBlock, GpuThreadsRange | None]:
-        pass
diff --git a/src/pystencils/backend/platforms/sycl.py b/src/pystencils/backend/platforms/sycl.py
index ec5e7eda05d0417a764d26294206c6c0dcf7d02d..9c04d6074b4feb0e63deddeb5a94cf11d920a0c0 100644
--- a/src/pystencils/backend/platforms/sycl.py
+++ b/src/pystencils/backend/platforms/sycl.py
@@ -1,3 +1,6 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
 from ..functions import CFunction, PsMathFunction, MathFunctions
 from ..kernelcreation.iteration_space import (
     IterationSpace,
@@ -22,10 +25,12 @@ from ..extensions.cpp import CppMethodCall
 
 from ..kernelcreation import KernelCreationContext, AstFactory
 from ..constants import PsConstant
-from .generic_gpu import GenericGpu, GpuThreadsRange
+from .generic_gpu import GenericGpu
 from ..exceptions import MaterializationError
 from ...types import PsCustomType, PsIeeeFloatType, constify, PsIntegerType
-from ...config import GpuIndexingConfig
+
+if TYPE_CHECKING:
+    from ...codegen import GpuIndexingConfig, GpuThreadsRange
 
 
 class SyclPlatform(GenericGpu):
@@ -34,6 +39,9 @@ class SyclPlatform(GenericGpu):
         self, ctx: KernelCreationContext, indexing_cfg: GpuIndexingConfig | None = None
     ):
         super().__init__(ctx)
+
+        from ...codegen.config import GpuIndexingConfig
+
         self._cfg = indexing_cfg if indexing_cfg is not None else GpuIndexingConfig()
 
     @property
@@ -109,7 +117,7 @@ class SyclPlatform(GenericGpu):
         id_decl = self._id_declaration(rank, id_symbol)
 
         dimensions = ispace.dimensions_in_loop_order()
-        launch_config = GpuThreadsRange.from_ispace(ispace)
+        launch_config = self.threads_from_ispace(ispace)
 
         indexing_decls = [id_decl]
         conds = []
@@ -184,7 +192,7 @@ class SyclPlatform(GenericGpu):
             body.statements = [sparse_idx_decl] + body.statements
             ast = body
 
-        return ast, GpuThreadsRange.from_ispace(ispace)
+        return ast, self.threads_from_ispace(ispace)
 
     def _item_type(self, rank: int):
         if not self._cfg.sycl_automatic_block_size:
diff --git a/src/pystencils/backend/transformations/add_pragmas.py b/src/pystencils/backend/transformations/add_pragmas.py
index d01f428744eb0c151f473c9fcdcd8e9bdb0cc271..78e721f3850e0075a8079131b84ae558abb50062 100644
--- a/src/pystencils/backend/transformations/add_pragmas.py
+++ b/src/pystencils/backend/transformations/add_pragmas.py
@@ -1,4 +1,6 @@
+from __future__ import annotations
 from dataclasses import dataclass
+from typing import TYPE_CHECKING
 
 from typing import Sequence
 from collections import defaultdict
@@ -8,7 +10,8 @@ from ..ast import PsAstNode
 from ..ast.structural import PsBlock, PsLoop, PsPragma
 from ..ast.expressions import PsExpression
 
-from ...config import OpenMpConfig
+if TYPE_CHECKING:
+    from ...codegen.config import OpenMpConfig
 
 __all__ = ["InsertPragmasAtLoops", "LoopPragma", "AddOpenMP"]
 
diff --git a/src/pystencils/backend/transformations/canonicalize_symbols.py b/src/pystencils/backend/transformations/canonicalize_symbols.py
index f5b356432a56cc8c2a33eba6ad533947b9f2b2ad..c0406c25d820df0a1c3821074395b8709b482113 100644
--- a/src/pystencils/backend/transformations/canonicalize_symbols.py
+++ b/src/pystencils/backend/transformations/canonicalize_symbols.py
@@ -72,7 +72,6 @@ class CanonicalizeSymbols:
                     symb.dtype = constify(symb.dtype)
 
         #   Any symbols still alive now are function params or globals
-        #   Might use that to populate KernelFunction
         self._last_result = cc
 
         return node
diff --git a/src/pystencils/boundaries/boundaryhandling.py b/src/pystencils/boundaries/boundaryhandling.py
index fe8dd7d0059940841277f954cc322a42d2d744b6..1f6e3d126365de0af08ee98ddd26d1600af15027 100644
--- a/src/pystencils/boundaries/boundaryhandling.py
+++ b/src/pystencils/boundaries/boundaryhandling.py
@@ -12,7 +12,7 @@ from pystencils.types import PsIntegerType
 from pystencils.types.quick import Arr, SInt
 from pystencils.gpu.gpu_array_handler import GPUArrayHandler
 from pystencils.field import Field, FieldType
-from pystencils.backend.properties import FieldBasePtr
+from pystencils.codegen.properties import FieldBasePtr
 
 try:
     # noinspection PyPep8Naming
diff --git a/src/pystencils/codegen/__init__.py b/src/pystencils/codegen/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e27b94b9ebec9197eb044cd05b5d55fd5ae17f1f
--- /dev/null
+++ b/src/pystencils/codegen/__init__.py
@@ -0,0 +1,28 @@
+from .target import Target
+from .config import (
+    CreateKernelConfig,
+    CpuOptimConfig,
+    VectorizationConfig,
+    OpenMpConfig,
+    GpuIndexingConfig,
+    AUTO,
+)
+from .parameters import Parameter
+from .kernel import Kernel, GpuKernel, GpuThreadsRange
+from .driver import create_kernel, get_driver
+
+__all__ = [
+    "Target",
+    "CreateKernelConfig",
+    "CpuOptimConfig",
+    "VectorizationConfig",
+    "OpenMpConfig",
+    "GpuIndexingConfig",
+    "AUTO",
+    "Parameter",
+    "Kernel",
+    "GpuKernel",
+    "GpuThreadsRange",
+    "create_kernel",
+    "get_driver",
+]
diff --git a/src/pystencils/config.py b/src/pystencils/codegen/config.py
similarity index 97%
rename from src/pystencils/config.py
rename to src/pystencils/codegen/config.py
index 506f7fd786ca3b38fec1cef83c9ec289206b4662..01161620c4a86355ba69daf613b1d468835e58c1 100644
--- a/src/pystencils/config.py
+++ b/src/pystencils/codegen/config.py
@@ -8,9 +8,9 @@ from typing import Sequence
 from dataclasses import dataclass, InitVar, replace
 
 from .target import Target
-from .field import Field, FieldType
+from ..field import Field, FieldType
 
-from .types import (
+from ..types import (
     PsIntegerType,
     UserTypeSpec,
     PsIeeeFloatType,
@@ -18,18 +18,17 @@ from .types import (
     create_type,
 )
 
-from .defaults import DEFAULTS
+from ..defaults import DEFAULTS
 
 if TYPE_CHECKING:
-    from .backend.jit import JitBase
+    from ..jit import JitBase
 
 
 class PsOptionsError(Exception):
     """Indicates an option clash in the `CreateKernelConfig`."""
 
 
-class _AUTO_TYPE:
-    ...
+class _AUTO_TYPE: ...  # noqa: E701
 
 
 AUTO = _AUTO_TYPE()
@@ -228,7 +227,7 @@ class CreateKernelConfig:
     """Just-in-time compiler used to compile and load the kernel for invocation from the current Python environment.
     
     If left at `None`, a default just-in-time compiler will be inferred from the `target` parameter.
-    To explicitly disable JIT compilation, pass `pystencils.backend.jit.no_jit`.
+    To explicitly disable JIT compilation, pass `pystencils.no_jit <pystencils.jit.no_jit>`.
     """
 
     function_name: str = "kernel"
@@ -341,12 +340,12 @@ class CreateKernelConfig:
         """Returns either the user-specified JIT compiler, or infers one from the target if none is given."""
         if self.jit is None:
             if self.target.is_cpu():
-                from .backend.jit import LegacyCpuJit
+                from ..jit import LegacyCpuJit
 
                 return LegacyCpuJit()
             elif self.target == Target.CUDA:
                 try:
-                    from .backend.jit.gpu_cupy import CupyJit
+                    from ..jit.gpu_cupy import CupyJit
 
                     if (
                         self.gpu_indexing is not None
@@ -357,12 +356,12 @@ class CreateKernelConfig:
                         return CupyJit()
 
                 except ImportError:
-                    from .backend.jit import no_jit
+                    from ..jit import no_jit
 
                     return no_jit
 
             elif self.target == Target.SYCL:
-                from .backend.jit import no_jit
+                from ..jit import no_jit
 
                 return no_jit
             else:
@@ -434,7 +433,7 @@ class CreateKernelConfig:
         cpu_openmp: bool | int | None,
         cpu_vectorize_info: dict | None,
         gpu_indexing_params: dict | None,
-    ):
+    ):  # pragma: no cover
         optim: CpuOptimConfig | None = None
 
         if data_type is not None:
@@ -533,7 +532,7 @@ class CreateKernelConfig:
             )
 
 
-def _deprecated_option(name, instead):
+def _deprecated_option(name, instead):  # pragma: no cover
     from warnings import warn
 
     warn(
diff --git a/src/pystencils/codegen/driver.py b/src/pystencils/codegen/driver.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bdec96cc0bd32eac08365b24186d319c27fb36a
--- /dev/null
+++ b/src/pystencils/codegen/driver.py
@@ -0,0 +1,541 @@
+from __future__ import annotations
+from typing import cast, Sequence, Iterable, TYPE_CHECKING
+from dataclasses import dataclass, replace
+
+from .target import Target
+from .config import CreateKernelConfig, OpenMpConfig, VectorizationConfig, AUTO
+from .kernel import Kernel, GpuKernel, GpuThreadsRange
+from .properties import PsSymbolProperty, FieldShape, FieldStride, FieldBasePtr
+from .parameters import Parameter
+
+from ..types import create_numeric_type, PsIntegerType, PsScalarType
+
+from ..backend.memory import PsSymbol
+from ..backend.ast import PsAstNode
+from ..backend.ast.structural import PsBlock, PsLoop
+from ..backend.ast.analysis import collect_undefined_symbols, collect_required_headers
+from ..backend.kernelcreation import (
+    KernelCreationContext,
+    KernelAnalysis,
+    FreezeExpressions,
+    Typifier,
+)
+from ..backend.constants import PsConstant
+from ..backend.kernelcreation.iteration_space import (
+    create_sparse_iteration_space,
+    create_full_iteration_space,
+    FullIterationSpace,
+)
+from ..backend.platforms import (
+    Platform,
+    GenericCpu,
+    GenericVectorCpu,
+    GenericGpu,
+)
+from ..backend.exceptions import VectorizationError
+
+from ..backend.transformations import (
+    EliminateConstants,
+    LowerToC,
+    SelectFunctions,
+    CanonicalizeSymbols,
+    HoistLoopInvariantDeclarations,
+)
+
+from ..simp import AssignmentCollection
+from sympy.codegen.ast import AssignmentBase
+
+if TYPE_CHECKING:
+    from ..jit import JitBase
+
+
+__all__ = ["create_kernel"]
+
+
+def create_kernel(
+    assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
+    config: CreateKernelConfig | None = None,
+    **kwargs,
+) -> Kernel:
+    """Create a kernel function from a set of assignments.
+
+    Args:
+        assignments: The kernel's sequence of assignments, expressed using SymPy
+        config: The configuration for the kernel translator
+        kwargs: If ``config`` is not set, it is created from the keyword arguments;
+            if it is set, its option will be overridden by any keyword arguments.
+
+    Returns:
+        The numerical kernel in pystencil's internal representation, ready to be
+        exported or compiled
+    """
+
+    if not config:
+        config = CreateKernelConfig()
+
+    if kwargs:
+        config = replace(config, **kwargs)
+
+    driver = DefaultKernelCreationDriver(config)
+    return driver(assignments)
+
+
+def get_driver(
+    cfg: CreateKernelConfig, *, retain_intermediates: bool = False
+) -> DefaultKernelCreationDriver:
+    """Create a code generation driver object from the given configuration.
+
+    Args:
+        cfg: Configuration for the code generator
+        retain_intermediates: If `True`, instructs the driver to keep copies of
+            the intermediate results of its stages for later inspection.
+    """
+    return DefaultKernelCreationDriver(cfg, retain_intermediates)
+
+
+class DefaultKernelCreationDriver:
+    """Drives the default kernel creation sequence.
+
+    Args:
+        cfg: Configuration for the code generator
+        retain_intermediates: If `True`, instructs the driver to keep copies of
+            the intermediate results of its stages for later inspection.
+    """
+
+    def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False):
+        self._cfg = cfg
+
+        idx_dtype = create_numeric_type(self._cfg.index_dtype)
+        assert isinstance(idx_dtype, PsIntegerType)
+
+        self._ctx = KernelCreationContext(
+            default_dtype=create_numeric_type(self._cfg.default_dtype),
+            index_dtype=idx_dtype,
+        )
+
+        self._target = self._cfg.get_target()
+        self._platform = self._get_platform()
+
+        if retain_intermediates:
+            self._intermediates = CodegenIntermediates()
+        else:
+            self._intermediates = None
+
+    @property
+    def intermediates(self) -> CodegenIntermediates | None:
+        return self._intermediates
+
+    def __call__(
+        self,
+        assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
+    ) -> Kernel:
+        kernel_body = self.parse_kernel_body(assignments)
+
+        match self._platform:
+            case GenericCpu():
+                kernel_ast = self._platform.materialize_iteration_space(
+                    kernel_body, self._ctx.get_iteration_space()
+                )
+            case GenericGpu():
+                kernel_ast, gpu_threads = self._platform.materialize_iteration_space(
+                    kernel_body, self._ctx.get_iteration_space()
+                )
+
+        if self._intermediates is not None:
+            self._intermediates.materialized_ispace = kernel_ast.clone()
+
+        #   Fold and extract constants
+        elim_constants = EliminateConstants(self._ctx, extract_constant_exprs=True)
+        kernel_ast = cast(PsBlock, elim_constants(kernel_ast))
+
+        if self._intermediates is not None:
+            self._intermediates.constants_eliminated = kernel_ast.clone()
+
+        #   Target-Specific optimizations
+        if self._cfg.target.is_cpu():
+            kernel_ast = self._transform_for_cpu(kernel_ast)
+
+        #   Note: After this point, the AST may contain intrinsics, so type-dependent
+        #   transformations cannot be run any more
+
+        #   Lowering
+        lower_to_c = LowerToC(self._ctx)
+        kernel_ast = cast(PsBlock, lower_to_c(kernel_ast))
+
+        select_functions = SelectFunctions(self._platform)
+        kernel_ast = cast(PsBlock, select_functions(kernel_ast))
+
+        if self._intermediates is not None:
+            self._intermediates.lowered = kernel_ast.clone()
+
+        #   Late canonicalization pass: Canonicalize new symbols introduced by LowerToC
+
+        canonicalize = CanonicalizeSymbols(self._ctx, True)
+        kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
+
+        if self._cfg.target.is_cpu():
+            return create_cpu_kernel_function(
+                self._ctx,
+                self._platform,
+                kernel_ast,
+                self._cfg.function_name,
+                self._cfg.target,
+                self._cfg.get_jit(),
+            )
+        else:
+            return create_gpu_kernel_function(
+                self._ctx,
+                self._platform,
+                kernel_ast,
+                gpu_threads,
+                self._cfg.function_name,
+                self._cfg.target,
+                self._cfg.get_jit(),
+            )
+
+    def parse_kernel_body(
+        self,
+        assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
+    ) -> PsBlock:
+        if isinstance(assignments, AssignmentBase):
+            assignments = [assignments]
+
+        if not isinstance(assignments, AssignmentCollection):
+            assignments = AssignmentCollection(assignments)  # type: ignore
+
+        _ = _parse_simplification_hints(assignments)
+
+        analysis = KernelAnalysis(
+            self._ctx,
+            not self._cfg.skip_independence_check,
+            not self._cfg.allow_double_writes,
+        )
+        analysis(assignments)
+
+        if self._cfg.index_field is not None:
+            ispace = create_sparse_iteration_space(
+                self._ctx, assignments, index_field=self._cfg.index_field
+            )
+        else:
+            gls = self._cfg.ghost_layers
+            islice = self._cfg.iteration_slice
+
+            if gls is None and islice is None:
+                gls = AUTO
+
+            ispace = create_full_iteration_space(
+                self._ctx,
+                assignments,
+                ghost_layers=gls,
+                iteration_slice=islice,
+            )
+
+        self._ctx.set_iteration_space(ispace)
+
+        freeze = FreezeExpressions(self._ctx)
+        kernel_body = freeze(assignments)
+
+        typify = Typifier(self._ctx)
+        kernel_body = typify(kernel_body)
+
+        if self._intermediates is not None:
+            self._intermediates.parsed_body = kernel_body.clone()
+
+        return kernel_body
+
+    def _transform_for_cpu(self, kernel_ast: PsBlock) -> PsBlock:
+        canonicalize = CanonicalizeSymbols(self._ctx, True)
+        kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
+
+        if self._intermediates is not None:
+            self._intermediates.cpu_canonicalize = kernel_ast.clone()
+
+        hoist_invariants = HoistLoopInvariantDeclarations(self._ctx)
+        kernel_ast = cast(PsBlock, hoist_invariants(kernel_ast))
+
+        if self._intermediates is not None:
+            self._intermediates.cpu_hoist_invariants = kernel_ast.clone()
+
+        cpu_cfg = self._cfg.cpu_optim
+
+        if cpu_cfg is None:
+            return kernel_ast
+
+        if cpu_cfg.loop_blocking:
+            raise NotImplementedError("Loop blocking not implemented yet.")
+
+        kernel_ast = self._vectorize(kernel_ast)
+
+        if cpu_cfg.openmp is not False:
+            from ..backend.transformations import AddOpenMP
+
+            params = (
+                cpu_cfg.openmp
+                if isinstance(cpu_cfg.openmp, OpenMpConfig)
+                else OpenMpConfig()
+            )
+            add_omp = AddOpenMP(self._ctx, params)
+            kernel_ast = cast(PsBlock, add_omp(kernel_ast))
+
+            if self._intermediates is not None:
+                self._intermediates.cpu_openmp = kernel_ast.clone()
+
+        if cpu_cfg.use_cacheline_zeroing:
+            raise NotImplementedError("CL-zeroing not implemented yet")
+
+        return kernel_ast
+
+    def _vectorize(self, kernel_ast: PsBlock) -> PsBlock:
+        assert self._cfg.cpu_optim is not None
+        vec_config = self._cfg.cpu_optim.get_vectorization_config()
+        if vec_config is None:
+            return kernel_ast
+
+        from ..backend.transformations import LoopVectorizer, SelectIntrinsics
+
+        assert isinstance(self._platform, GenericVectorCpu)
+
+        ispace = self._ctx.get_iteration_space()
+        if not isinstance(ispace, FullIterationSpace):
+            raise VectorizationError(
+                "Unable to vectorize kernel: The kernel is not using a dense iteration space."
+            )
+
+        inner_loop_coord = ispace.loop_order[-1]
+        inner_loop_dim = ispace.dimensions[inner_loop_coord]
+
+        #   Apply stride (TODO: and alignment) assumptions
+        if vec_config.assume_inner_stride_one:
+            for field in self._ctx.fields:
+                buf = self._ctx.get_buffer(field)
+                inner_stride = buf.strides[inner_loop_coord]
+                if isinstance(inner_stride, PsConstant):
+                    if inner_stride.value != 1:
+                        raise VectorizationError(
+                            f"Unable to apply assumption 'assume_inner_stride_one': "
+                            f"Field {field} has fixed stride {inner_stride} "
+                            f"set in the inner coordinate {inner_loop_coord}."
+                        )
+                else:
+                    buf.strides[inner_loop_coord] = PsConstant(1, buf.index_type)
+                    #   TODO: Communicate assumption to runtime system via a precondition
+
+        #   Call loop vectorizer
+        if vec_config.lanes is None:
+            lanes = VectorizationConfig.default_lanes(
+                self._target, cast(PsScalarType, self._ctx.default_dtype)
+            )
+        else:
+            lanes = vec_config.lanes
+
+        vectorizer = LoopVectorizer(self._ctx, lanes)
+
+        def loop_predicate(loop: PsLoop):
+            return loop.counter.symbol == inner_loop_dim.counter
+
+        kernel_ast = vectorizer.vectorize_select_loops(kernel_ast, loop_predicate)
+
+        if self._intermediates is not None:
+            self._intermediates.cpu_vectorize = kernel_ast.clone()
+
+        select_intrin = SelectIntrinsics(self._ctx, self._platform)
+        kernel_ast = cast(PsBlock, select_intrin(kernel_ast))
+
+        if self._intermediates is not None:
+            self._intermediates.cpu_select_intrins = kernel_ast.clone()
+
+        return kernel_ast
+
+    def _get_platform(self) -> Platform:
+        if Target._CPU in self._target:
+            if Target._X86 in self._target:
+                from ..backend.platforms.x86 import X86VectorArch, X86VectorCpu
+
+                arch: X86VectorArch
+
+                if Target._SSE in self._target:
+                    arch = X86VectorArch.SSE
+                elif Target._AVX in self._target:
+                    arch = X86VectorArch.AVX
+                elif Target._AVX512 in self._target:
+                    if Target._FP16 in self._target:
+                        arch = X86VectorArch.AVX512_FP16
+                    else:
+                        arch = X86VectorArch.AVX512
+                else:
+                    assert False, "unreachable code"
+
+                return X86VectorCpu(self._ctx, arch)
+            elif self._target == Target.GenericCPU:
+                return GenericCpu(self._ctx)
+            else:
+                raise NotImplementedError(
+                    f"No platform is currently available for CPU target {self._target}"
+                )
+
+        elif Target._GPU in self._target:
+            match self._target:
+                case Target.SYCL:
+                    from ..backend.platforms import SyclPlatform
+
+                    return SyclPlatform(self._ctx, self._cfg.gpu_indexing)
+                case Target.CUDA:
+                    from ..backend.platforms import CudaPlatform
+
+                    return CudaPlatform(self._ctx, self._cfg.gpu_indexing)
+
+        raise NotImplementedError(
+            f"Code generation for target {self._target} not implemented"
+        )
+
+
+def create_cpu_kernel_function(
+    ctx: KernelCreationContext,
+    platform: Platform,
+    body: PsBlock,
+    function_name: str,
+    target_spec: Target,
+    jit: JitBase,
+) -> Kernel:
+    undef_symbols = collect_undefined_symbols(body)
+
+    params = _get_function_params(ctx, undef_symbols)
+    req_headers = _get_headers(ctx, platform, body)
+
+    kfunc = Kernel(body, target_spec, function_name, params, req_headers, jit)
+    kfunc.metadata.update(ctx.metadata)
+    return kfunc
+
+
+def create_gpu_kernel_function(
+    ctx: KernelCreationContext,
+    platform: Platform,
+    body: PsBlock,
+    threads_range: GpuThreadsRange | None,
+    function_name: str,
+    target_spec: Target,
+    jit: JitBase,
+) -> GpuKernel:
+    undef_symbols = collect_undefined_symbols(body)
+
+    if threads_range is not None:
+        for threads in threads_range.num_work_items:
+            undef_symbols |= collect_undefined_symbols(threads)
+
+    params = _get_function_params(ctx, undef_symbols)
+    req_headers = _get_headers(ctx, platform, body)
+
+    kfunc = GpuKernel(
+        body,
+        threads_range,
+        target_spec,
+        function_name,
+        params,
+        req_headers,
+        jit,
+    )
+    kfunc.metadata.update(ctx.metadata)
+    return kfunc
+
+
+def _get_function_params(
+    ctx: KernelCreationContext, symbols: Iterable[PsSymbol]
+) -> list[Parameter]:
+    params: list[Parameter] = []
+
+    from pystencils.backend.memory import BufferBasePtr
+
+    for symb in symbols:
+        props: set[PsSymbolProperty] = set()
+        for prop in symb.properties:
+            match prop:
+                case FieldShape() | FieldStride():
+                    props.add(prop)
+                case BufferBasePtr(buf):
+                    field = ctx.find_field(buf.name)
+                    props.add(FieldBasePtr(field))
+        params.append(Parameter(symb.name, symb.get_dtype(), props))
+
+    params.sort(key=lambda p: p.name)
+    return params
+
+
+def _get_headers(
+    ctx: KernelCreationContext, platform: Platform, body: PsBlock
+) -> set[str]:
+    req_headers = collect_required_headers(body)
+    req_headers |= platform.required_headers
+    req_headers |= ctx.required_headers
+    return req_headers
+
+
+@dataclass
+class StageResult:
+    ast: PsAstNode
+    label: str
+
+
+class StageResultSlot:
+    def __init__(self, description: str | None = None):
+        self._description = description
+        self._name: str
+        self._lookup: str
+
+    def __set_name__(self, owner, name: str):
+        self._name = name
+        self._lookup = f"_{name}"
+
+    def __get__(self, obj, objtype=None) -> StageResult | None:
+        if obj is None:
+            return None
+
+        ast = getattr(obj, self._lookup, None)
+        if ast is not None:
+            descr = self._name if self._description is None else self._description
+            return StageResult(ast, descr)
+        else:
+            return None
+
+    def __set__(self, obj, val: PsAstNode | None):
+        setattr(obj, self._lookup, val)
+
+
+class CodegenIntermediates:
+    """Intermediate results produced by the code generator."""
+
+    parsed_body = StageResultSlot("Freeze & Type Deduction")
+    materialized_ispace = StageResultSlot("Iteration Space Materialization")
+    constants_eliminated = StageResultSlot("Constant Elimination")
+    cpu_canonicalize = StageResultSlot("CPU: Symbol Canonicalization")
+    cpu_hoist_invariants = StageResultSlot("CPU: Hoisting of Loop Invariants")
+    cpu_vectorize = StageResultSlot("CPU: Vectorization")
+    cpu_select_intrins = StageResultSlot("CPU: Intrinsics Selection")
+    cpu_openmp = StageResultSlot("CPU: OpenMP Instrumentation")
+    lowered = StageResultSlot("C Language Lowering")
+
+    @property
+    def available_stages(self) -> Sequence[StageResult]:
+        all_results: list[StageResult | None] = [
+            getattr(self, name)
+            for name, slot in CodegenIntermediates.__dict__.items()
+            if isinstance(slot, StageResultSlot)
+        ]
+        return tuple(filter(lambda r: r is not None, all_results))  # type: ignore
+
+
+def create_staggered_kernel(
+    assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs
+):
+    raise NotImplementedError(
+        "Staggered kernels are not yet implemented for pystencils 2.0"
+    )
+
+
+#   Internals
+
+
+def _parse_simplification_hints(ac: AssignmentCollection):
+    if "split_groups" in ac.simplification_hints:
+        raise NotImplementedError(
+            "Loop splitting was requested, but is not implemented yet"
+        )
diff --git a/src/pystencils/codegen/kernel.py b/src/pystencils/codegen/kernel.py
new file mode 100644
index 0000000000000000000000000000000000000000..3adc47876dc36af02ee307dde25ad5d7250cd3fb
--- /dev/null
+++ b/src/pystencils/codegen/kernel.py
@@ -0,0 +1,171 @@
+from __future__ import annotations
+
+from warnings import warn
+from typing import Callable, Sequence, Any, TYPE_CHECKING
+from itertools import chain
+
+from .target import Target
+from .parameters import Parameter
+from ..backend.ast.structural import PsBlock
+from ..backend.ast.expressions import PsExpression
+from ..field import Field
+
+from .._deprecation import _deprecated
+
+if TYPE_CHECKING:
+    from ..jit import JitBase
+
+
+class Kernel:
+    """A pystencils kernel.
+
+    The kernel object is the final result of the translation process.
+    It is immutable, and its AST should not be altered any more, either, as this
+    might invalidate information about the kernel already stored in the kernel object.
+    """
+
+    def __init__(
+        self,
+        body: PsBlock,
+        target: Target,
+        name: str,
+        parameters: Sequence[Parameter],
+        required_headers: set[str],
+        jit: JitBase,
+    ):
+        self._body: PsBlock = body
+        self._target = target
+        self._name = name
+        self._params = tuple(parameters)
+        self._required_headers = required_headers
+        self._jit = jit
+        self._metadata: dict[str, Any] = dict()
+
+    @property
+    def metadata(self) -> dict[str, Any]:
+        return self._metadata
+
+    @property
+    def body(self) -> PsBlock:
+        return self._body
+
+    @property
+    def target(self) -> Target:
+        return self._target
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @name.setter
+    def name(self, n: str):
+        self._name = n
+
+    @property
+    def function_name(self) -> str:  # pragma: no cover
+        _deprecated("function_name", "name")
+        return self._name
+
+    @function_name.setter
+    def function_name(self, n: str):  # pragma: no cover
+        _deprecated("function_name", "name")
+        self._name = n
+
+    @property
+    def parameters(self) -> tuple[Parameter, ...]:
+        return self._params
+
+    def get_parameters(self) -> tuple[Parameter, ...]:  # pragma: no cover
+        _deprecated("Kernel.get_parameters", "Kernel.parameters")
+        return self.parameters
+
+    def get_fields(self) -> set[Field]:
+        return set(chain.from_iterable(p.fields for p in self._params))
+
+    @property
+    def fields_accessed(self) -> set[Field]:  # pragma: no cover
+        warn(
+            "`fields_accessed` is deprecated and will be removed in a future version of pystencils. "
+            "Use `get_fields` instead.",
+            DeprecationWarning,
+        )
+        return self.get_fields()
+
+    @property
+    def required_headers(self) -> set[str]:
+        return self._required_headers
+
+    def get_c_code(self) -> str:
+        from ..backend.emission import CAstPrinter
+
+        printer = CAstPrinter()
+        return printer(self)
+
+    def get_ir_code(self) -> str:
+        from ..backend.emission import IRAstPrinter
+
+        printer = IRAstPrinter()
+        return printer(self)
+
+    def compile(self) -> Callable[..., None]:
+        """Invoke the underlying just-in-time compiler to obtain the kernel as an executable Python function."""
+        return self._jit.compile(self)
+
+
+class GpuKernel(Kernel):
+    """Internal representation of a kernel function targeted at CUDA GPUs."""
+
+    def __init__(
+        self,
+        body: PsBlock,
+        threads_range: GpuThreadsRange | None,
+        target: Target,
+        name: str,
+        parameters: Sequence[Parameter],
+        required_headers: set[str],
+        jit: JitBase,
+    ):
+        super().__init__(body, target, name, parameters, required_headers, jit)
+        self._threads_range = threads_range
+
+    @property
+    def threads_range(self) -> GpuThreadsRange | None:
+        """Object exposing the total size of the launch grid this kernel expects to be executed with."""
+        return self._threads_range
+
+
+class GpuThreadsRange:
+    """Number of threads required by a GPU kernel, in order (x, y, z)."""
+
+    def __init__(
+        self,
+        num_work_items: Sequence[PsExpression],
+    ):
+        self._dim = len(num_work_items)
+        self._num_work_items = tuple(num_work_items)
+
+    # @property
+    # def grid_size(self) -> tuple[PsExpression, ...]:
+    #     return self._grid_size
+
+    # @property
+    # def block_size(self) -> tuple[PsExpression, ...]:
+    #     return self._block_size
+
+    @property
+    def num_work_items(self) -> tuple[PsExpression, ...]:
+        """Number of work items in (x, y, z)-order."""
+        return self._num_work_items
+
+    @property
+    def dim(self) -> int:
+        return self._dim
+
+    def __str__(self) -> str:
+        rep = "GpuThreadsRange { "
+        rep += "; ".join(f"{x}: {w}" for x, w in zip("xyz", self._num_work_items))
+        rep += " }"
+        return rep
+
+    def _repr_html_(self) -> str:
+        return str(self)
diff --git a/src/pystencils/codegen/parameters.py b/src/pystencils/codegen/parameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8411266ee514d4270a7a9d1c2fb24383f005329
--- /dev/null
+++ b/src/pystencils/codegen/parameters.py
@@ -0,0 +1,133 @@
+from __future__ import annotations
+
+from warnings import warn
+from typing import Sequence, Iterable
+
+from .properties import (
+    PsSymbolProperty,
+    _FieldProperty,
+    FieldShape,
+    FieldStride,
+    FieldBasePtr,
+)
+from ..types import PsType
+from ..field import Field
+from ..sympyextensions import TypedSymbol
+
+
+class Parameter:
+    """Parameter to an output object of the code generator."""
+
+    __match_args__ = ("name", "dtype", "properties")
+
+    def __init__(
+        self, name: str, dtype: PsType, properties: Iterable[PsSymbolProperty] = ()
+    ):
+        self._name = name
+        self._dtype = dtype
+        self._properties: frozenset[PsSymbolProperty] = (
+            frozenset(properties) if properties is not None else frozenset()
+        )
+        self._fields: tuple[Field, ...] = tuple(
+            sorted(
+                set(
+                    p.field  # type: ignore
+                    for p in filter(
+                        lambda p: isinstance(p, _FieldProperty), self._properties
+                    )
+                ),
+                key=lambda f: f.name,
+            )
+        )
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def dtype(self):
+        return self._dtype
+
+    def _hashable_contents(self):
+        return (self._name, self._dtype, self._properties)
+
+    #   TODO: Need?
+    def __hash__(self) -> int:
+        return hash(self._hashable_contents())
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Parameter):
+            return False
+
+        return (
+            type(self) is type(other)
+            and self._hashable_contents() == other._hashable_contents()
+        )
+
+    def __str__(self) -> str:
+        return self._name
+
+    def __repr__(self) -> str:
+        return f"{type(self).__name__}(name = {self._name}, dtype = {self._dtype})"
+
+    @property
+    def symbol(self) -> TypedSymbol:
+        return TypedSymbol(self.name, self.dtype)
+
+    @property
+    def fields(self) -> Sequence[Field]:
+        """Set of fields associated with this parameter."""
+        return self._fields
+
+    def get_properties(
+        self, prop_type: type[PsSymbolProperty] | tuple[type[PsSymbolProperty], ...]
+    ) -> set[PsSymbolProperty]:
+        """Retrieve all properties of the given type(s) attached to this parameter"""
+        return set(filter(lambda p: isinstance(p, prop_type), self._properties))
+
+    @property
+    def properties(self) -> frozenset[PsSymbolProperty]:
+        return self._properties
+
+    @property
+    def is_field_parameter(self) -> bool:
+        return bool(self._fields)
+
+    #   Deprecated legacy properties
+    #   These are kept mostly for the legacy waLBerla code generation system
+
+    @property
+    def is_field_pointer(self) -> bool:  # pragma: no cover
+        warn(
+            "`is_field_pointer` is deprecated and will be removed in a future version of pystencils. "
+            "Use `param.get_properties(FieldBasePtr)` instead.",
+            DeprecationWarning,
+        )
+        return bool(self.get_properties(FieldBasePtr))
+
+    @property
+    def is_field_stride(self) -> bool:  # pragma: no cover
+        warn(
+            "`is_field_stride` is deprecated and will be removed in a future version of pystencils. "
+            "Use `param.get_properties(FieldStride)` instead.",
+            DeprecationWarning,
+        )
+        return bool(self.get_properties(FieldStride))
+
+    @property
+    def is_field_shape(self) -> bool:  # pragma: no cover
+        warn(
+            "`is_field_shape` is deprecated and will be removed in a future version of pystencils. "
+            "Use `param.get_properties(FieldShape)` instead.",
+            DeprecationWarning,
+        )
+        return bool(self.get_properties(FieldShape))
+
+    @property
+    def field_name(self) -> str:  # pragma: no cover
+        warn(
+            "`field_name` is deprecated and will be removed in a future version of pystencils. "
+            "Use `param.fields[0].name` instead.",
+            DeprecationWarning,
+        )
+        return self._fields[0].name
diff --git a/src/pystencils/backend/properties.py b/src/pystencils/codegen/properties.py
similarity index 100%
rename from src/pystencils/backend/properties.py
rename to src/pystencils/codegen/properties.py
diff --git a/src/pystencils/target.py b/src/pystencils/codegen/target.py
similarity index 100%
rename from src/pystencils/target.py
rename to src/pystencils/codegen/target.py
diff --git a/src/pystencils/datahandling/__init__.py b/src/pystencils/datahandling/__init__.py
index 76a494255a69c7cb880d362ff6eb1835a8f1e33a..ff1a12c96d749b9d5405a62f606ab1ca77e06de5 100644
--- a/src/pystencils/datahandling/__init__.py
+++ b/src/pystencils/datahandling/__init__.py
@@ -3,7 +3,7 @@ import warnings
 from typing import Tuple, Union
 
 from .datahandling_interface import DataHandling
-from ..target import Target
+from ..codegen.target import Target
 from .serial_datahandling import SerialDataHandling
 
 try:
diff --git a/src/pystencils/datahandling/datahandling_interface.py b/src/pystencils/datahandling/datahandling_interface.py
index f42c4ef138e04eede2719cbce69f9975b656cb30..867bbf062d7307187c6a72f465fe54177bbfacc1 100644
--- a/src/pystencils/datahandling/datahandling_interface.py
+++ b/src/pystencils/datahandling/datahandling_interface.py
@@ -3,7 +3,7 @@ from typing import Callable, Dict, Iterable, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
-from pystencils.target import Target
+from ..codegen import Target
 from pystencils.field import Field, FieldType
 
 
diff --git a/src/pystencils/datahandling/parallel_datahandling.py b/src/pystencils/datahandling/parallel_datahandling.py
index f3f7305228f6ba2c68f04c39c05ddb6c8ff7610c..8c7ce6e628f44b0a40cbe6c91e2605eb08176c23 100644
--- a/src/pystencils/datahandling/parallel_datahandling.py
+++ b/src/pystencils/datahandling/parallel_datahandling.py
@@ -9,7 +9,7 @@ from pystencils.datahandling.blockiteration import block_iteration, sliced_block
 from pystencils.datahandling.datahandling_interface import DataHandling
 from pystencils.field import Field, FieldType
 from pystencils.utils import DotDict
-from pystencils.backend.properties import FieldBasePtr
+from pystencils.codegen.properties import FieldBasePtr
 from pystencils import Target
 
 
diff --git a/src/pystencils/datahandling/serial_datahandling.py b/src/pystencils/datahandling/serial_datahandling.py
index 6a5ce573085b0380196e208c7d19ec16cf5fbb37..73b749ca46171c86f6183789bff2731efb8a1a5d 100644
--- a/src/pystencils/datahandling/serial_datahandling.py
+++ b/src/pystencils/datahandling/serial_datahandling.py
@@ -6,7 +6,7 @@ import numpy as np
 
 from pystencils.datahandling.blockiteration import SerialBlock
 from pystencils.datahandling.datahandling_interface import DataHandling
-from pystencils.target import Target
+from ..codegen import Target
 from pystencils.field import (Field, FieldType, create_numpy_array_with_layout,
                               layout_string_to_tuple, spatial_layout_string_to_tuple)
 from pystencils.gpu.gpu_array_handler import GPUArrayHandler, GPUNotAvailableHandler
diff --git a/src/pystencils/display_utils.py b/src/pystencils/display_utils.py
index 7f110c9c06f97fd37f17e734f5501f856216e56f..919dea4a8b568143065e8361fc695a044c69d541 100644
--- a/src/pystencils/display_utils.py
+++ b/src/pystencils/display_utils.py
@@ -2,9 +2,8 @@ from typing import Any, Dict, Optional
 
 import sympy as sp
 
-from pystencils.backend import KernelFunction
-from pystencils.kernel_wrapper import KernelWrapper as OldKernelWrapper
-from .backend.jit import KernelWrapper
+from .codegen import Kernel
+from .jit import KernelWrapper
 
 
 def to_dot(expr: sp.Expr, graph_style: Optional[Dict[str, Any]] = None, short=True):
@@ -43,32 +42,27 @@ def highlight_cpp(code: str):
     return HTML(highlight(code, CppLexer(), HtmlFormatter()))
 
 
-def get_code_obj(ast: KernelWrapper | KernelFunction, custom_backend=None):
+def get_code_obj(ast: KernelWrapper | Kernel, custom_backend=None):
     """Returns an object to display generated code (C/C++ or CUDA)
 
     Can either be displayed as HTML in Jupyter notebooks or printed as normal string.
     """
-    from pystencils.backend.emission import emit_code
-
-    if isinstance(ast, OldKernelWrapper):
-        ast = ast.ast
-    elif isinstance(ast, KernelWrapper):
-        ast = ast.kernel_function
+    if isinstance(ast, KernelWrapper):
+        func = ast.kernel_function
+    else:
+        func = ast
 
     class CodeDisplay:
-        def __init__(self, ast_input):
-            self.ast = ast_input
-
         def _repr_html_(self):
-            return highlight_cpp(emit_code(self.ast)).__html__()
+            return highlight_cpp(func.get_c_code()).__html__()
 
         def __str__(self):
-            return emit_code(self.ast)
+            return func.get_c_code()
 
         def __repr__(self):
-            return emit_code(self.ast)
+            return func.get_c_code()
 
-    return CodeDisplay(ast)
+    return CodeDisplay()
 
 
 def get_code_str(ast, custom_backend=None):
@@ -88,7 +82,7 @@ def _isnotebook():
         return False
 
 
-def show_code(ast: KernelWrapper | KernelFunction, custom_backend=None):
+def show_code(ast: KernelWrapper | Kernel, custom_backend=None):
     code = get_code_obj(ast, custom_backend)
 
     if _isnotebook():
diff --git a/src/pystencils/enums.py b/src/pystencils/enums.py
index 86048059d67e6132223825e3b94588b35e30796e..bcea50e84cfba9190a2353245c7c29168443ca13 100644
--- a/src/pystencils/enums.py
+++ b/src/pystencils/enums.py
@@ -1,4 +1,4 @@
-from .target import Target as _Target
+from .codegen import Target as _Target
 
 from warnings import warn
 
diff --git a/src/pystencils/inspection.py b/src/pystencils/inspection.py
index cb03a1c8da31a479784e0c99185eb6c6f760533d..2c43fc73ddeb92748d9c571bb422ab7dcf1a64fc 100644
--- a/src/pystencils/inspection.py
+++ b/src/pystencils/inspection.py
@@ -2,8 +2,8 @@ from typing import overload
 
 from .backend.ast import PsAstNode
 from .backend.emission import CAstPrinter, IRAstPrinter, EmissionError
-from .backend.kernelfunction import KernelFunction
-from .kernelcreation import StageResult, CodegenIntermediates
+from .codegen import Kernel
+from .codegen.driver import StageResult, CodegenIntermediates
 from abc import ABC, abstractmethod
 
 _UNABLE_TO_DISPLAY_CPP = """
@@ -37,7 +37,7 @@ class CodeInspectionBase(ABC):
         self._ir_printer = IRAstPrinter(annotate_constants=False)
         self._c_printer = CAstPrinter()
 
-    def _ir_tab(self, ir_obj: PsAstNode | KernelFunction):
+    def _ir_tab(self, ir_obj: PsAstNode | Kernel):
         import ipywidgets as widgets
 
         ir = self._ir_printer(ir_obj)
@@ -45,7 +45,7 @@ class CodeInspectionBase(ABC):
         self._apply_tab_layout(ir_tab)
         return ir_tab
 
-    def _cpp_tab(self, ir_obj: PsAstNode | KernelFunction):
+    def _cpp_tab(self, ir_obj: PsAstNode | Kernel):
         import ipywidgets as widgets
 
         try:
@@ -64,7 +64,7 @@ class CodeInspectionBase(ABC):
         self._apply_tab_layout(cpp_tab)
         return cpp_tab
 
-    def _graphviz_tab(self, ir_obj: PsAstNode | KernelFunction):
+    def _graphviz_tab(self, ir_obj: PsAstNode | Kernel):
         import ipywidgets as widgets
 
         graphviz_tab = widgets.HTML(_GRAPHVIZ_NOT_IMPLEMENTED)
@@ -137,7 +137,7 @@ class AstInspection(CodeInspectionBase):
 class KernelInspection(CodeInspectionBase):
     def __init__(
         self,
-        kernel: KernelFunction,
+        kernel: Kernel,
         show_ir: bool = True,
         show_cpp: bool = True,
         show_graph: bool = True,
@@ -229,7 +229,7 @@ def inspect(obj: PsAstNode): ...
 
 
 @overload
-def inspect(obj: KernelFunction): ...
+def inspect(obj: Kernel): ...
 
 
 @overload
@@ -246,7 +246,7 @@ def inspect(obj, show_ir: bool = True, show_cpp: bool = True, show_graph: bool =
     When run inside a Jupyter notebook, this function displays an inspection widget
     for the following types of objects:
     - `PsAstNode`
-    - `KernelFunction`
+    - `Kernel`
     - `StageResult`
     - `CodegenIntermediates`
     """
@@ -258,7 +258,7 @@ def inspect(obj, show_ir: bool = True, show_cpp: bool = True, show_graph: bool =
             preview = AstInspection(
                 obj, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
             )
-        case KernelFunction():
+        case Kernel():
             preview = KernelInspection(
                 obj, show_ir=show_ir, show_cpp=show_cpp, show_graph=show_cpp
             )
diff --git a/src/pystencils/backend/jit/__init__.py b/src/pystencils/jit/__init__.py
similarity index 90%
rename from src/pystencils/backend/jit/__init__.py
rename to src/pystencils/jit/__init__.py
index f45cb9bff09d07fd85fd59957bf3582a3eb7f80f..1ef8378d3000e95b12bb6a3a17062fb6488e1729 100644
--- a/src/pystencils/backend/jit/__init__.py
+++ b/src/pystencils/jit/__init__.py
@@ -2,7 +2,7 @@
 JIT compilation is realized by subclasses of `JitBase`.
 A JIT compiler may freely be created and configured by the user.
 It can then be passed to `create_kernel` using the ``jit`` argument of
-`CreateKernelConfig`, in which case it is hooked into the `KernelFunction.compile` method
+`CreateKernelConfig`, in which case it is hooked into the `Kernel.compile` method
 of the generated kernel function::
 
     my_jit = MyJit()
@@ -24,7 +24,7 @@ It is due to be replaced in the near future.
 
 from .jit import JitBase, NoJit, KernelWrapper
 from .legacy_cpu import LegacyCpuJit
-from .gpu_cupy import CupyJit
+from .gpu_cupy import CupyJit, CupyKernelWrapper, LaunchGrid
 
 no_jit = NoJit()
 """Disables just-in-time compilation for a kernel."""
@@ -36,4 +36,6 @@ __all__ = [
     "NoJit",
     "no_jit",
     "CupyJit",
+    "CupyKernelWrapper",
+    "LaunchGrid"
 ]
diff --git a/src/pystencils/backend/jit/cpu_extension_module.py b/src/pystencils/jit/cpu_extension_module.py
similarity index 86%
rename from src/pystencils/backend/jit/cpu_extension_module.py
rename to src/pystencils/jit/cpu_extension_module.py
index 4412f8879a346d5c3635271e9d3700fed041435f..befb033e6f7969a5ffd9bc7742e9e7ab691da47d 100644
--- a/src/pystencils/backend/jit/cpu_extension_module.py
+++ b/src/pystencils/jit/cpu_extension_module.py
@@ -9,22 +9,19 @@ from textwrap import indent
 
 import numpy as np
 
-from ..exceptions import PsInternalCompilerError
-from ..kernelfunction import (
-    KernelFunction,
-    KernelParameter,
+from ..codegen import (
+    Kernel,
+    Parameter,
 )
-from ..properties import FieldBasePtr, FieldShape, FieldStride
-from ..constraints import KernelParamsConstraint
-from ...types import (
+from ..codegen.properties import FieldBasePtr, FieldShape, FieldStride
+from ..types import (
     PsType,
     PsUnsignedIntegerType,
     PsSignedIntegerType,
     PsIeeeFloatType,
 )
-from ...types.quick import Fp, SInt, UInt
-from ...field import Field
-from ..emission import emit_code
+from ..types.quick import Fp, SInt, UInt
+from ..field import Field
 
 
 class PsKernelExtensioNModule:
@@ -38,11 +35,11 @@ class PsKernelExtensioNModule:
         self._module_name = module_name
 
         if custom_backend is not None:
-            raise PsInternalCompilerError(
+            raise Exception(
                 "The `custom_backend` parameter exists only for interface compatibility and cannot be set."
             )
 
-        self._kernels: dict[str, KernelFunction] = dict()
+        self._kernels: dict[str, Kernel] = dict()
         self._code_string: str | None = None
         self._code_hash: str | None = None
 
@@ -50,7 +47,7 @@ class PsKernelExtensioNModule:
     def module_name(self) -> str:
         return self._module_name
 
-    def add_function(self, kernel_function: KernelFunction, name: str | None = None):
+    def add_function(self, kernel_function: Kernel, name: str | None = None):
         if name is None:
             name = kernel_function.name
 
@@ -98,7 +95,7 @@ class PsKernelExtensioNModule:
             old_name = kernel.name
             kernel.name = f"kernel_{name}"
 
-            code += emit_code(kernel)
+            code += kernel.get_c_code()
             code += "\n"
             code += emit_call_wrapper(name, kernel)
             code += "\n"
@@ -122,14 +119,14 @@ class PsKernelExtensioNModule:
         print(self._code_string, file=file)
 
 
-def emit_call_wrapper(function_name: str, kernel: KernelFunction) -> str:
+def emit_call_wrapper(function_name: str, kernel: Kernel) -> str:
     builder = CallWrapperBuilder()
 
     for p in kernel.parameters:
         builder.extract_parameter(p)
 
-    for c in kernel.constraints:
-        builder.check_constraint(c)
+    # for c in kernel.constraints:
+    #     builder.check_constraint(c)
 
     builder.call(kernel, kernel.parameters)
 
@@ -206,8 +203,8 @@ if( !kwargs || !PyDict_Check(kwargs) ) {{
         self._array_extractions: dict[Field, str] = dict()
         self._array_frees: dict[Field, str] = dict()
 
-        self._array_assoc_var_extractions: dict[KernelParameter, str] = dict()
-        self._scalar_extractions: dict[KernelParameter, str] = dict()
+        self._array_assoc_var_extractions: dict[Parameter, str] = dict()
+        self._scalar_extractions: dict[Parameter, str] = dict()
 
         self._constraint_checks: list[str] = []
 
@@ -223,7 +220,7 @@ if( !kwargs || !PyDict_Check(kwargs) ) {{
                 return "PyLong_AsUnsignedLong"
 
             case _:
-                raise PsInternalCompilerError(
+                raise ValueError(
                     f"Don't know how to cast Python objects to {dtype}"
                 )
 
@@ -267,7 +264,7 @@ if( !kwargs || !PyDict_Check(kwargs) ) {{
 
         return self._array_buffers[field]
 
-    def extract_scalar(self, param: KernelParameter) -> str:
+    def extract_scalar(self, param: Parameter) -> str:
         if param not in self._scalar_extractions:
             extract_func = self._scalar_extractor(param.dtype)
             code = self.TMPL_EXTRACT_SCALAR.format(
@@ -279,7 +276,7 @@ if( !kwargs || !PyDict_Check(kwargs) ) {{
 
         return param.name
 
-    def extract_array_assoc_var(self, param: KernelParameter) -> str:
+    def extract_array_assoc_var(self, param: Parameter) -> str:
         if param not in self._array_assoc_var_extractions:
             field = param.fields[0]
             buffer = self.extract_field(field)
@@ -305,31 +302,31 @@ if( !kwargs || !PyDict_Check(kwargs) ) {{
 
         return param.name
 
-    def extract_parameter(self, param: KernelParameter):
+    def extract_parameter(self, param: Parameter):
         if param.is_field_parameter:
             self.extract_array_assoc_var(param)
         else:
             self.extract_scalar(param)
 
-    def check_constraint(self, constraint: KernelParamsConstraint):
-        variables = constraint.get_parameters()
+#     def check_constraint(self, constraint: KernelParamsConstraint):
+#         variables = constraint.get_parameters()
 
-        for var in variables:
-            self.extract_parameter(var)
+#         for var in variables:
+#             self.extract_parameter(var)
 
-        cond = constraint.to_code()
+#         cond = constraint.to_code()
 
-        code = f"""
-if(!({cond}))
-{{
-    PyErr_SetString(PyExc_ValueError, "Violated constraint: {constraint}"); 
-    return NULL;
-}}
-"""
+#         code = f"""
+# if(!({cond}))
+# {{
+#     PyErr_SetString(PyExc_ValueError, "Violated constraint: {constraint}"); 
+#     return NULL;
+# }}
+# """
 
-        self._constraint_checks.append(code)
+#         self._constraint_checks.append(code)
 
-    def call(self, kernel: KernelFunction, params: tuple[KernelParameter, ...]):
+    def call(self, kernel: Kernel, params: tuple[Parameter, ...]):
         param_list = ", ".join(p.name for p in params)
         self._call = f"{kernel.name} ({param_list});"
 
diff --git a/src/pystencils/backend/jit/gpu_cupy.py b/src/pystencils/jit/gpu_cupy.py
similarity index 90%
rename from src/pystencils/backend/jit/gpu_cupy.py
rename to src/pystencils/jit/gpu_cupy.py
index 1dd18767160a626ff7972ebb78f83bb3e64a1efc..c208ac2196151d079ca5081f1377c55d18a9393c 100644
--- a/src/pystencils/backend/jit/gpu_cupy.py
+++ b/src/pystencils/jit/gpu_cupy.py
@@ -8,21 +8,20 @@ try:
 except ImportError:
     HAVE_CUPY = False
 
-from ...target import Target
-from ...field import FieldType
+from ..codegen import Target
+from ..field import FieldType
 
-from ...types import PsType
+from ..types import PsType
 from .jit import JitBase, JitError, KernelWrapper
-from ..kernelfunction import (
-    KernelFunction,
-    GpuKernelFunction,
-    KernelParameter,
+from ..codegen import (
+    Kernel,
+    GpuKernel,
+    Parameter,
 )
-from ..properties import FieldShape, FieldStride, FieldBasePtr
-from ..emission import emit_code
-from ...types import PsStructType
+from ..codegen.properties import FieldShape, FieldStride, FieldBasePtr
+from ..types import PsStructType
 
-from ...include import get_pystencils_include_path
+from ..include import get_pystencils_include_path
 
 
 @dataclass
@@ -34,18 +33,18 @@ class LaunchGrid:
 class CupyKernelWrapper(KernelWrapper):
     def __init__(
         self,
-        kfunc: GpuKernelFunction,
+        kfunc: GpuKernel,
         raw_kernel: Any,
         block_size: tuple[int, int, int],
     ):
-        self._kfunc: GpuKernelFunction = kfunc
+        self._kfunc: GpuKernel = kfunc
         self._raw_kernel = raw_kernel
         self._block_size = block_size
         self._num_blocks: tuple[int, int, int] | None = None
         self._args_cache: dict[Any, tuple] = dict()
 
     @property
-    def kernel_function(self) -> GpuKernelFunction:
+    def kernel_function(self) -> GpuKernel:
         return self._kfunc
 
     @property
@@ -105,7 +104,7 @@ class CupyKernelWrapper(KernelWrapper):
         field_shapes = set()
         index_shapes = set()
 
-        def check_shape(field_ptr: KernelParameter, arr: cp.ndarray):
+        def check_shape(field_ptr: Parameter, arr: cp.ndarray):
             field = field_ptr.fields[0]
 
             if field.has_fixed_shape:
@@ -190,7 +189,7 @@ class CupyKernelWrapper(KernelWrapper):
                 add_arg(kparam.name, val, kparam.dtype)
 
         #   Determine launch grid
-        from ..ast.expressions import evaluate_expression
+        from ..backend.ast.expressions import evaluate_expression
 
         symbolic_threads_range = self._kfunc.threads_range
 
@@ -243,13 +242,13 @@ class CupyJit(JitBase):
             tuple(default_block_size) + (1,) * (3 - len(default_block_size)),
         )
 
-    def compile(self, kfunc: KernelFunction) -> KernelWrapper:
+    def compile(self, kfunc: Kernel) -> KernelWrapper:
         if not HAVE_CUPY:
             raise JitError(
                 "`cupy` is not installed: just-in-time-compilation of CUDA kernels is unavailable."
             )
 
-        if not isinstance(kfunc, GpuKernelFunction) or kfunc.target != Target.CUDA:
+        if not isinstance(kfunc, GpuKernel) or kfunc.target != Target.CUDA:
             raise ValueError(
                 "The CupyJit just-in-time compiler only accepts kernels generated for CUDA or HIP"
             )
@@ -269,7 +268,7 @@ class CupyJit(JitBase):
         options.append("-I" + get_pystencils_include_path())
         return tuple(options)
 
-    def _prelude(self, kfunc: GpuKernelFunction) -> str:
+    def _prelude(self, kfunc: GpuKernel) -> str:
         headers = self._runtime_headers
         headers |= kfunc.required_headers
 
@@ -286,6 +285,6 @@ class CupyJit(JitBase):
 
         return code
 
-    def _kernel_code(self, kfunc: GpuKernelFunction) -> str:
-        kernel_code = emit_code(kfunc)
+    def _kernel_code(self, kfunc: GpuKernel) -> str:
+        kernel_code = kfunc.get_c_code()
         return f'extern "C" {kernel_code}'
diff --git a/src/pystencils/backend/jit/jit.py b/src/pystencils/jit/jit.py
similarity index 74%
rename from src/pystencils/backend/jit/jit.py
rename to src/pystencils/jit/jit.py
index 2d091c4a009f27ba1d1efb2e7bab37021ff001dd..4998c14adfdc810a93d1a1f96cc310ac81c65f5d 100644
--- a/src/pystencils/backend/jit/jit.py
+++ b/src/pystencils/jit/jit.py
@@ -3,8 +3,7 @@ from typing import Sequence, TYPE_CHECKING
 from abc import ABC, abstractmethod
 
 if TYPE_CHECKING:
-    from ..kernelfunction import KernelFunction, KernelParameter
-    from ...target import Target
+    from ..codegen import Kernel, Parameter, Target
 
 
 class JitError(Exception):
@@ -14,7 +13,7 @@ class JitError(Exception):
 class KernelWrapper(ABC):
     """Wrapper around a compiled and executable pystencils kernel."""
 
-    def __init__(self, kfunc: KernelFunction) -> None:
+    def __init__(self, kfunc: Kernel) -> None:
         self._kfunc = kfunc
 
     @abstractmethod
@@ -22,11 +21,11 @@ class KernelWrapper(ABC):
         pass
 
     @property
-    def kernel_function(self) -> KernelFunction:
+    def kernel_function(self) -> Kernel:
         return self._kfunc
     
     @property
-    def ast(self) -> KernelFunction:
+    def ast(self) -> Kernel:
         return self._kfunc
     
     @property
@@ -34,7 +33,7 @@ class KernelWrapper(ABC):
         return self._kfunc.target
     
     @property
-    def parameters(self) -> Sequence[KernelParameter]:
+    def parameters(self) -> Sequence[Parameter]:
         return self._kfunc.parameters
 
     @property
@@ -48,14 +47,14 @@ class JitBase(ABC):
     """Base class for just-in-time compilation interfaces implemented in pystencils."""
 
     @abstractmethod
-    def compile(self, kernel: KernelFunction) -> KernelWrapper:
+    def compile(self, kernel: Kernel) -> KernelWrapper:
         """Compile a kernel function and return a callable object which invokes the kernel."""
 
 
 class NoJit(JitBase):
     """Not a JIT compiler: Used to explicitly disable JIT compilation on an AST."""
 
-    def compile(self, kernel: KernelFunction) -> KernelWrapper:
+    def compile(self, kernel: Kernel) -> KernelWrapper:
         raise JitError(
             "Just-in-time compilation of this kernel was explicitly disabled."
         )
diff --git a/src/pystencils/backend/jit/legacy_cpu.py b/src/pystencils/jit/legacy_cpu.py
similarity index 98%
rename from src/pystencils/backend/jit/legacy_cpu.py
rename to src/pystencils/jit/legacy_cpu.py
index 1acd1b22ad48ac0564d255314bd6603405421fdc..514e9b60e4a5ae83a234be9f3cd514fdc7a0e555 100644
--- a/src/pystencils/backend/jit/legacy_cpu.py
+++ b/src/pystencils/jit/legacy_cpu.py
@@ -61,7 +61,7 @@ import time
 import warnings
 
 
-from ..kernelfunction import KernelFunction
+from ..codegen import Kernel
 from .jit import JitBase, KernelWrapper
 from .cpu_extension_module import PsKernelExtensioNModule
 
@@ -71,7 +71,7 @@ from pystencils.utils import atomic_file_write, recursive_dict_update
 
 
 class CpuKernelWrapper(KernelWrapper):
-    def __init__(self, kfunc: KernelFunction, compiled_kernel: Callable[..., None]) -> None:
+    def __init__(self, kfunc: Kernel, compiled_kernel: Callable[..., None]) -> None:
         super().__init__(kfunc)
         self._compiled_kernel = compiled_kernel
 
@@ -86,7 +86,7 @@ class CpuKernelWrapper(KernelWrapper):
 class LegacyCpuJit(JitBase):
     """Wrapper around ``pystencils.cpu.cpujit``"""
 
-    def compile(self, kernel: KernelFunction) -> KernelWrapper:
+    def compile(self, kernel: Kernel) -> KernelWrapper:
         return compile_and_load(kernel)
 
 
@@ -436,7 +436,7 @@ def compile_module(code, code_hash, base_dir, compile_flags=None):
     return lib_file
 
 
-def compile_and_load(kernel: KernelFunction, custom_backend=None):
+def compile_and_load(kernel: Kernel, custom_backend=None):
     cache_config = get_cache_config()
 
     compiler_config = get_compiler_config()
diff --git a/src/pystencils/backend/jit/msvc_detection.py b/src/pystencils/jit/msvc_detection.py
similarity index 100%
rename from src/pystencils/backend/jit/msvc_detection.py
rename to src/pystencils/jit/msvc_detection.py
diff --git a/src/pystencils/kernel_decorator.py b/src/pystencils/kernel_decorator.py
index a3590d3a4bdcddb43aba31ddf943206ba7e47f84..4e18d7245ba89c3d891679e68f39208b7dae032d 100644
--- a/src/pystencils/kernel_decorator.py
+++ b/src/pystencils/kernel_decorator.py
@@ -7,7 +7,7 @@ import sympy as sp
 
 from .assignment import Assignment
 from .sympyextensions import SymbolCreator
-from pystencils.config import CreateKernelConfig
+from .codegen import CreateKernelConfig
 
 __all__ = ['kernel', 'kernel_config']
 
diff --git a/src/pystencils/kernel_wrapper.py b/src/pystencils/kernel_wrapper.py
index afce06d77a17e0eb067d84a02bc273ba0668fc55..5095332c18fa4526fc0b7fb37aad80bc6dc18452 100644
--- a/src/pystencils/kernel_wrapper.py
+++ b/src/pystencils/kernel_wrapper.py
@@ -1,3 +1,3 @@
-from .backend.jit import KernelWrapper as _KernelWrapper
+from .jit import KernelWrapper as _KernelWrapper
 
 KernelWrapper = _KernelWrapper
diff --git a/src/pystencils/kernelcreation.py b/src/pystencils/kernelcreation.py
index 096d644e121a6760ccd1472420e1a5fbbecda48f..97965f709fa092ff95f908a4dc721a6a76ec8e95 100644
--- a/src/pystencils/kernelcreation.py
+++ b/src/pystencils/kernelcreation.py
@@ -1,427 +1,17 @@
-from __future__ import annotations
+from .codegen import Target
+from .codegen import create_kernel as _create_kernel
 
-from typing import cast, Sequence
-from dataclasses import dataclass, replace
+from warnings import warn
 
-from .target import Target
-from .config import (
-    CreateKernelConfig,
-    OpenMpConfig,
-    VectorizationConfig,
-    AUTO
+warn(
+    "Importing anything from `pystencils.kernelcreation` is deprecated "
+    "and the module will be removed in pystencils 2.1. "
+    "Import from `pystencils` instead.",
+    FutureWarning,
 )
-from .backend import KernelFunction
-from .types import create_numeric_type, PsIntegerType, PsScalarType
-from .backend.ast import PsAstNode
-from .backend.ast.structural import PsBlock, PsLoop
-from .backend.kernelcreation import (
-    KernelCreationContext,
-    KernelAnalysis,
-    FreezeExpressions,
-    Typifier,
-)
-from .backend.constants import PsConstant
-from .backend.kernelcreation.iteration_space import (
-    create_sparse_iteration_space,
-    create_full_iteration_space,
-    FullIterationSpace,
-)
-from .backend.platforms import Platform, GenericCpu, GenericVectorCpu, GenericGpu
-from .backend.exceptions import VectorizationError
-
-from .backend.transformations import (
-    EliminateConstants,
-    LowerToC,
-    SelectFunctions,
-    CanonicalizeSymbols,
-    HoistLoopInvariantDeclarations,
-)
-from .backend.kernelfunction import (
-    create_cpu_kernel_function,
-    create_gpu_kernel_function,
-)
-
-from .simp import AssignmentCollection
-from sympy.codegen.ast import AssignmentBase
-
-
-__all__ = ["create_kernel"]
-
-
-def create_kernel(
-    assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
-    config: CreateKernelConfig | None = None,
-    **kwargs,
-) -> KernelFunction:
-    """Create a kernel function from a set of assignments.
-
-    Args:
-        assignments: The kernel's sequence of assignments, expressed using SymPy
-        config: The configuration for the kernel translator
-        kwargs: If ``config`` is not set, it is created from the keyword arguments;
-            if it is set, its option will be overridden by any keyword arguments.
-
-    Returns:
-        The numerical kernel in pystencil's internal representation, ready to be
-        exported or compiled
-    """
-
-    if not config:
-        config = CreateKernelConfig()
-
-    if kwargs:
-        config = replace(config, **kwargs)
-
-    driver = DefaultKernelCreationDriver(config)
-    return driver(assignments)
-
-
-def get_driver(cfg: CreateKernelConfig, *, retain_intermediates: bool = False):
-    return DefaultKernelCreationDriver(cfg, retain_intermediates)
-
-
-class DefaultKernelCreationDriver:
-    def __init__(self, cfg: CreateKernelConfig, retain_intermediates: bool = False):
-        self._cfg = cfg
-
-        idx_dtype = create_numeric_type(self._cfg.index_dtype)
-        assert isinstance(idx_dtype, PsIntegerType)
-
-        self._ctx = KernelCreationContext(
-            default_dtype=create_numeric_type(self._cfg.default_dtype),
-            index_dtype=idx_dtype,
-        )
-
-        self._target = self._cfg.get_target()
-        self._platform = self._get_platform()
-
-        if retain_intermediates:
-            self._intermediates = CodegenIntermediates()
-        else:
-            self._intermediates = None
-
-    @property
-    def intermediates(self) -> CodegenIntermediates | None:
-        return self._intermediates
-
-    def __call__(
-        self,
-        assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
-    ):
-        kernel_body = self.parse_kernel_body(
-            assignments
-        )
-
-        match self._platform:
-            case GenericCpu():
-                kernel_ast = self._platform.materialize_iteration_space(
-                    kernel_body, self._ctx.get_iteration_space()
-                )
-            case GenericGpu():
-                kernel_ast, gpu_threads = self._platform.materialize_iteration_space(
-                    kernel_body, self._ctx.get_iteration_space()
-                )
-
-        if self._intermediates is not None:
-            self._intermediates.materialized_ispace = kernel_ast.clone()
-
-        #   Fold and extract constants
-        elim_constants = EliminateConstants(self._ctx, extract_constant_exprs=True)
-        kernel_ast = cast(PsBlock, elim_constants(kernel_ast))
-
-        if self._intermediates is not None:
-            self._intermediates.constants_eliminated = kernel_ast.clone()
-
-        #   Target-Specific optimizations
-        if self._cfg.target.is_cpu():
-            kernel_ast = self._transform_for_cpu(kernel_ast)
-
-        #   Note: After this point, the AST may contain intrinsics, so type-dependent
-        #   transformations cannot be run any more
-
-        #   Lowering
-        lower_to_c = LowerToC(self._ctx)
-        kernel_ast = cast(PsBlock, lower_to_c(kernel_ast))
-
-        select_functions = SelectFunctions(self._platform)
-        kernel_ast = cast(PsBlock, select_functions(kernel_ast))
-
-        if self._intermediates is not None:
-            self._intermediates.lowered = kernel_ast.clone()
-
-        #   Late canonicalization pass: Canonicalize new symbols introduced by LowerToC
-
-        canonicalize = CanonicalizeSymbols(self._ctx, True)
-        kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
-
-        if self._cfg.target.is_cpu():
-            return create_cpu_kernel_function(
-                self._ctx,
-                self._platform,
-                kernel_ast,
-                self._cfg.function_name,
-                self._cfg.target,
-                self._cfg.get_jit(),
-            )
-        else:
-            return create_gpu_kernel_function(
-                self._ctx,
-                self._platform,
-                kernel_ast,
-                gpu_threads,
-                self._cfg.function_name,
-                self._cfg.target,
-                self._cfg.get_jit(),
-            )
-
-    def parse_kernel_body(
-        self,
-        assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase,
-    ) -> PsBlock:
-        if isinstance(assignments, AssignmentBase):
-            assignments = [assignments]
-
-        if not isinstance(assignments, AssignmentCollection):
-            assignments = AssignmentCollection(assignments)  # type: ignore
-
-        _ = _parse_simplification_hints(assignments)
-
-        analysis = KernelAnalysis(
-            self._ctx,
-            not self._cfg.skip_independence_check,
-            not self._cfg.allow_double_writes,
-        )
-        analysis(assignments)
-
-        if self._cfg.index_field is not None:
-            ispace = create_sparse_iteration_space(
-                self._ctx, assignments, index_field=self._cfg.index_field
-            )
-        else:
-            gls = self._cfg.ghost_layers
-            islice = self._cfg.iteration_slice
-
-            if gls is None and islice is None:
-                gls = AUTO
-
-            ispace = create_full_iteration_space(
-                self._ctx,
-                assignments,
-                ghost_layers=gls,
-                iteration_slice=islice,
-            )
-
-        self._ctx.set_iteration_space(ispace)
-
-        freeze = FreezeExpressions(self._ctx)
-        kernel_body = freeze(assignments)
-
-        typify = Typifier(self._ctx)
-        kernel_body = typify(kernel_body)
-
-        if self._intermediates is not None:
-            self._intermediates.parsed_body = kernel_body.clone()
-
-        return kernel_body
 
-    def _transform_for_cpu(self, kernel_ast: PsBlock):
-        canonicalize = CanonicalizeSymbols(self._ctx, True)
-        kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
 
-        if self._intermediates is not None:
-            self._intermediates.cpu_canonicalize = kernel_ast.clone()
-
-        hoist_invariants = HoistLoopInvariantDeclarations(self._ctx)
-        kernel_ast = cast(PsBlock, hoist_invariants(kernel_ast))
-
-        if self._intermediates is not None:
-            self._intermediates.cpu_hoist_invariants = kernel_ast.clone()
-
-        cpu_cfg = self._cfg.cpu_optim
-
-        if cpu_cfg is None:
-            return kernel_ast
-
-        if cpu_cfg.loop_blocking:
-            raise NotImplementedError("Loop blocking not implemented yet.")
-
-        kernel_ast = self._vectorize(kernel_ast)
-
-        if cpu_cfg.openmp is not False:
-            from .backend.transformations import AddOpenMP
-
-            params = (
-                cpu_cfg.openmp
-                if isinstance(cpu_cfg.openmp, OpenMpConfig)
-                else OpenMpConfig()
-            )
-            add_omp = AddOpenMP(self._ctx, params)
-            kernel_ast = cast(PsBlock, add_omp(kernel_ast))
-
-            if self._intermediates is not None:
-                self._intermediates.cpu_openmp = kernel_ast.clone()
-
-        if cpu_cfg.use_cacheline_zeroing:
-            raise NotImplementedError("CL-zeroing not implemented yet")
-
-        return kernel_ast
-
-    def _vectorize(self, kernel_ast: PsBlock) -> PsBlock:
-        assert self._cfg.cpu_optim is not None
-        vec_config = self._cfg.cpu_optim.get_vectorization_config()
-        if vec_config is None:
-            return kernel_ast
-
-        from .backend.transformations import LoopVectorizer, SelectIntrinsics
-
-        assert isinstance(self._platform, GenericVectorCpu)
-
-        ispace = self._ctx.get_iteration_space()
-        if not isinstance(ispace, FullIterationSpace):
-            raise VectorizationError(
-                "Unable to vectorize kernel: The kernel is not using a dense iteration space."
-            )
-
-        inner_loop_coord = ispace.loop_order[-1]
-        inner_loop_dim = ispace.dimensions[inner_loop_coord]
-
-        #   Apply stride (TODO: and alignment) assumptions
-        if vec_config.assume_inner_stride_one:
-            for field in self._ctx.fields:
-                buf = self._ctx.get_buffer(field)
-                inner_stride = buf.strides[inner_loop_coord]
-                if isinstance(inner_stride, PsConstant):
-                    if inner_stride.value != 1:
-                        raise VectorizationError(
-                            f"Unable to apply assumption 'assume_inner_stride_one': "
-                            f"Field {field} has fixed stride {inner_stride} "
-                            f"set in the inner coordinate {inner_loop_coord}."
-                        )
-                else:
-                    buf.strides[inner_loop_coord] = PsConstant(1, buf.index_type)
-                    #   TODO: Communicate assumption to runtime system via a precondition
-
-        #   Call loop vectorizer
-        if vec_config.lanes is None:
-            lanes = VectorizationConfig.default_lanes(
-                self._target, cast(PsScalarType, self._ctx.default_dtype)
-            )
-        else:
-            lanes = vec_config.lanes
-
-        vectorizer = LoopVectorizer(self._ctx, lanes)
-
-        def loop_predicate(loop: PsLoop):
-            return loop.counter.symbol == inner_loop_dim.counter
-
-        kernel_ast = vectorizer.vectorize_select_loops(kernel_ast, loop_predicate)
-
-        if self._intermediates is not None:
-            self._intermediates.cpu_vectorize = kernel_ast.clone()
-
-        select_intrin = SelectIntrinsics(self._ctx, self._platform)
-        kernel_ast = cast(PsBlock, select_intrin(kernel_ast))
-
-        if self._intermediates is not None:
-            self._intermediates.cpu_select_intrins = kernel_ast.clone()
-
-        return kernel_ast
-
-    def _get_platform(self) -> Platform:
-        if Target._CPU in self._target:
-            if Target._X86 in self._target:
-                from .backend.platforms.x86 import X86VectorArch, X86VectorCpu
-
-                arch: X86VectorArch
-
-                if Target._SSE in self._target:
-                    arch = X86VectorArch.SSE
-                elif Target._AVX in self._target:
-                    arch = X86VectorArch.AVX
-                elif Target._AVX512 in self._target:
-                    if Target._FP16 in self._target:
-                        arch = X86VectorArch.AVX512_FP16
-                    else:
-                        arch = X86VectorArch.AVX512
-                else:
-                    assert False, "unreachable code"
-
-                return X86VectorCpu(self._ctx, arch)
-            elif self._target == Target.GenericCPU:
-                return GenericCpu(self._ctx)
-            else:
-                raise NotImplementedError(
-                    f"No platform is currently available for CPU target {self._target}"
-                )
-
-        elif Target._GPU in self._target:
-            match self._target:
-                case Target.SYCL:
-                    from .backend.platforms import SyclPlatform
-
-                    return SyclPlatform(self._ctx, self._cfg.gpu_indexing)
-                case Target.CUDA:
-                    from .backend.platforms import CudaPlatform
-
-                    return CudaPlatform(self._ctx, self._cfg.gpu_indexing)
-
-        raise NotImplementedError(
-            f"Code generation for target {self._target} not implemented"
-        )
-
-
-@dataclass
-class StageResult:
-    ast: PsAstNode
-    label: str
-
-
-class StageResultSlot:
-    def __init__(self, description: str | None = None):
-        self._description = description
-        self._name: str
-        self._lookup: str
-
-    def __set_name__(self, owner, name: str):
-        self._name = name
-        self._lookup = f"_{name}"
-
-    def __get__(self, obj, objtype=None) -> StageResult | None:
-        if obj is None:
-            return None
-
-        ast = getattr(obj, self._lookup, None)
-        if ast is not None:
-            descr = self._name if self._description is None else self._description
-            return StageResult(ast, descr)
-        else:
-            return None
-
-    def __set__(self, obj, val: PsAstNode | None):
-        setattr(obj, self._lookup, val)
-
-
-class CodegenIntermediates:
-    """Intermediate results produced by the code generator."""
-
-    parsed_body = StageResultSlot("Freeze & Type Deduction")
-    materialized_ispace = StageResultSlot("Iteration Space Materialization")
-    constants_eliminated = StageResultSlot("Constant Elimination")
-    cpu_canonicalize = StageResultSlot("CPU: Symbol Canonicalization")
-    cpu_hoist_invariants = StageResultSlot("CPU: Hoisting of Loop Invariants")
-    cpu_vectorize = StageResultSlot("CPU: Vectorization")
-    cpu_select_intrins = StageResultSlot("CPU: Intrinsics Selection")
-    cpu_openmp = StageResultSlot("CPU: OpenMP Instrumentation")
-    lowered = StageResultSlot("C Language Lowering")
-
-    @property
-    def available_stages(self) -> Sequence[StageResult]:
-        all_results: list[StageResult | None] = [
-            getattr(self, name)
-            for name, slot in CodegenIntermediates.__dict__.items()
-            if isinstance(slot, StageResultSlot)
-        ]
-        return tuple(filter(lambda r: r is not None, all_results))  # type: ignore
+create_kernel = _create_kernel
 
 
 def create_staggered_kernel(
@@ -430,13 +20,3 @@ def create_staggered_kernel(
     raise NotImplementedError(
         "Staggered kernels are not yet implemented for pystencils 2.0"
     )
-
-
-#   Internals
-
-
-def _parse_simplification_hints(ac: AssignmentCollection):
-    if "split_groups" in ac.simplification_hints:
-        raise NotImplementedError(
-            "Loop splitting was requested, but is not implemented yet"
-        )
diff --git a/src/pystencils/runhelper/db.py b/src/pystencils/runhelper/db.py
index dd413a5e405771822d36611d1068936b74ee334c..e199829584c65ea096db1fc6c8e0192e44805705 100644
--- a/src/pystencils/runhelper/db.py
+++ b/src/pystencils/runhelper/db.py
@@ -8,7 +8,7 @@ import six
 from blitzdb.backends.file.backend import serializer_classes
 from blitzdb.backends.file.utils import JsonEncoder
 
-from pystencils.backend.jit.legacy_cpu import get_compiler_config
+from pystencils.jit.legacy_cpu import get_compiler_config
 from pystencils import CreateKernelConfig, Target, Field
 
 import json
diff --git a/tests/_todo/test_vectorization.py b/tests/_todo/test_vectorization.py
index fd416ab4cd05c7b8891aae4da91cc9aeae425698..de71209ff28215ce28d3b625342d68b420e94f09 100644
--- a/tests/_todo/test_vectorization.py
+++ b/tests/_todo/test_vectorization.py
@@ -2,7 +2,6 @@ import numpy as np
 
 import pytest
 
-import pystencils.config
 import sympy as sp
 
 import pystencils as ps
@@ -141,7 +140,7 @@ def test_aligned_and_nt_stores(openmp, instruction_set=instruction_set):
            'assume_inner_stride_one': True}
     update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))]
     # Without the base pointer spec, the inner store is not aligned
-    config = pystencils.config.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp)
+    config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp)
     ast = ps.create_kernel(update_rule, config=config)
     if instruction_set in ['sse'] or instruction_set.startswith('avx'):
         assert 'stream' in ast.instruction_set
@@ -166,7 +165,7 @@ def test_nt_stores_symbolic_size(instruction_set=instruction_set):
     update_rule = [ps.Assignment(f.center(), 0.0), ps.Assignment(g.center(), 0.0)]
     opt = {'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True,
            'assume_inner_stride_one': True}
-    config = pystencils.config.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt)
+    config = ps.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt)
     ast = ps.create_kernel(update_rule, config=config)
     # ps.show_code(ast)
     ast.compile()
@@ -187,7 +186,7 @@ def test_inplace_update(instruction_set=instruction_set):
         f1 @= 2 * s.tmp0
         f2 @= 2 * s.tmp0
 
-    config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
+    config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
     ast = ps.create_kernel(update_rule, config=config)
     kernel = ast.compile()
     kernel(f=arr)
@@ -379,7 +378,7 @@ def test_issue40(*_):
     eq = [ps.Assignment(sp.Symbol('rho'), 1.0),
           ps.Assignment(src[0, 0](0), sp.Rational(4, 9) * sp.Symbol('rho'))]
 
-    config = pystencils.config.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt, data_type='float64')
+    config = ps.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt, data_type='float64')
     ast = ps.create_kernel(eq, config=config)
 
     code = ps.get_code_str(ast)
diff --git a/tests/_todo/test_vectorization_specific.py b/tests/_todo/test_vectorization_specific.py
index d1930a07aaf990997766d19e9263a4a637a86bef..0f2c68a4de3711a3f7920fb0b964c35705b45e4e 100644
--- a/tests/_todo/test_vectorization_specific.py
+++ b/tests/_todo/test_vectorization_specific.py
@@ -2,7 +2,6 @@ import pytest
 
 import numpy as np
 
-import pystencils.config
 import sympy as sp
 
 import pystencils as ps
@@ -30,7 +29,7 @@ def test_vectorisation_varying_arch(instruction_set):
         f1 @= 2 * s.tmp0
         f2 @= 2 * s.tmp0
 
-    config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
+    config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
     ast = ps.create_kernel(update_rule, config=config)
     kernel = ast.compile()
     kernel(f=arr)
@@ -49,7 +48,7 @@ def test_vectorized_abs(instruction_set, dtype):
     f, g = ps.fields(f=arr, g=arr)
     update_rule = [ps.Assignment(g.center(), sp.Abs(f.center()))]
 
-    config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
+    config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
     ast = ps.create_kernel(update_rule, config=config)
 
     func = ast.compile()
@@ -66,20 +65,20 @@ def test_strided(instruction_set, dtype):
     if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) \
             and instruction_set not in ['avx512', 'avx512vl', 'rvv'] and not instruction_set.startswith('sve'):
         with pytest.warns(UserWarning) as warn:
-            config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
+            config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
                                                           default_number_float=dtype)
             ast = ps.create_kernel(update_rule, config=config)
             assert 'Could not vectorize loop' in warn[0].message.args[0]
     else:
         with pytest.warns(None) as warn:
-            config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
+            config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
                                                           default_number_float=dtype)
             ast = ps.create_kernel(update_rule, config=config)
             assert len(warn) == 0
 
     # ps.show_code(ast)
     func = ast.compile()
-    ref_config = pystencils.config.CreateKernelConfig(default_number_float=dtype)
+    ref_config = ps.CreateKernelConfig(default_number_float=dtype)
     ref_func = ps.create_kernel(update_rule, config=ref_config).compile()
 
     # For some reason other array creations fail on the emulated ppc pipeline
@@ -115,7 +114,7 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set
     update_rule = ps.Assignment(dst[0, 0], src[0, 0])
     opt = {'instruction_set': instruction_set, 'assume_aligned': True,
            'nontemporal': True, 'assume_inner_stride_one': True}
-    config = pystencils.config.CreateKernelConfig(target=dh.default_target,
+    config = ps.CreateKernelConfig(target=dh.default_target,
                                                   cpu_vectorize_info=opt, ghost_layers=gl_kernel)
     ast = ps.create_kernel(update_rule, config=config)
     kernel = ast.compile()
@@ -152,7 +151,7 @@ def test_vectorization_other(instruction_set, function):
 @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
 @pytest.mark.parametrize('field_layout', ('fzyx', 'zyxf'))
 def test_square_root(dtype, instruction_set, field_layout):
-    config = pystencils.config.CreateKernelConfig(data_type=dtype,
+    config = ps.CreateKernelConfig(data_type=dtype,
                                                   default_number_float=dtype,
                                                   cpu_vectorize_info={'instruction_set': instruction_set,
                                                                       'assume_inner_stride_one': True,
@@ -195,7 +194,7 @@ def test_square_root_2(dtype, instruction_set, padding):
 @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
 @pytest.mark.parametrize('padding', (True, False))
 def test_pow(dtype, instruction_set, padding):
-    config = pystencils.config.CreateKernelConfig(data_type=dtype,
+    config = ps.CreateKernelConfig(data_type=dtype,
                                                   default_number_float=dtype,
                                                   cpu_vectorize_info={'instruction_set': instruction_set,
                                                                       'assume_inner_stride_one': True,
diff --git a/tests/frontend/test_simplifications.py b/tests/frontend/test_simplifications.py
index 5e1bcb8ed06145f61ba31bcb6dea85a2e7bdbf58..45cde724108fe7578d8ff2dc9b8a2509a9add728 100644
--- a/tests/frontend/test_simplifications.py
+++ b/tests/frontend/test_simplifications.py
@@ -1,7 +1,6 @@
 from sys import version_info as vs
 import pytest
 
-import pystencils.config
 import sympy as sp
 import pystencils as ps
 
@@ -188,7 +187,7 @@ def test_evaluate_constant_terms(target):
         src[0, 0]: -sp.cos(1) + dst[0, 0]
     })
 
-    config = pystencils.config.CreateKernelConfig(target=target)
+    config = ps.CreateKernelConfig(target=target)
     ast = ps.create_kernel(assignments, config=config)
     code = ps.get_code_str(ast)
     assert 'cos(' not in code and 'cosf(' not in code
diff --git a/tests/kernelcreation/test_domain_kernels.py b/tests/kernelcreation/test_domain_kernels.py
index d02bfd8e46e8fc8c8f19bcebffc0db52787ff1bd..da261faec49940df31d59f44651956e2012b113a 100644
--- a/tests/kernelcreation/test_domain_kernels.py
+++ b/tests/kernelcreation/test_domain_kernels.py
@@ -10,16 +10,14 @@ from pystencils import (
     AssignmentCollection,
     Target,
     CreateKernelConfig,
-    CpuOptimConfig,
-    VectorizationConfig,
 )
 from pystencils.assignment import assignment_from_stencil
 
-from pystencils.kernelcreation import create_kernel, KernelFunction
+from pystencils import create_kernel, Kernel
 from pystencils.backend.emission import emit_code
 
 
-def inspect_dp_kernel(kernel: KernelFunction, gen_config: CreateKernelConfig):
+def inspect_dp_kernel(kernel: Kernel, gen_config: CreateKernelConfig):
     code = emit_code(kernel)
 
     match gen_config.target:
diff --git a/tests/kernelcreation/test_index_kernels.py b/tests/kernelcreation/test_index_kernels.py
index 5093c43ff4f74343b0fcf3f45d34b1cfb6597d05..569c0ab6a0e582de895a66c656697fdf8a5909ee 100644
--- a/tests/kernelcreation/test_index_kernels.py
+++ b/tests/kernelcreation/test_index_kernels.py
@@ -2,7 +2,7 @@ import numpy as np
 import pytest
 
 from pystencils import Assignment, Field, FieldType, AssignmentCollection, Target
-from pystencils.kernelcreation import create_kernel, CreateKernelConfig
+from pystencils import create_kernel, CreateKernelConfig
 
 
 @pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
diff --git a/tests/kernelcreation/test_iteration_slices.py b/tests/kernelcreation/test_iteration_slices.py
index fb7f37eba99e2625a0eeb050a49575f657342ca5..fee3544f88087917290a42ed76d8941577726759 100644
--- a/tests/kernelcreation/test_iteration_slices.py
+++ b/tests/kernelcreation/test_iteration_slices.py
@@ -19,7 +19,7 @@ from pystencils import (
 from pystencils.sympyextensions.integer_functions import int_rem
 from pystencils.simp import sympy_cse_on_assignment_list
 from pystencils.slicing import normalize_slice
-from pystencils.backend.jit.gpu_cupy import CupyKernelWrapper
+from pystencils.jit.gpu_cupy import CupyKernelWrapper
 
 
 def test_sliced_iteration():
diff --git a/tests/kernelcreation/test_sum_prod.py b/tests/kernelcreation/test_sum_prod.py
index 9cd638c00300b9e3ff93c81f9ac56f5a6e5890f6..9d61d3bc4bf56c92569b4dbece1446a9395b8222 100644
--- a/tests/kernelcreation/test_sum_prod.py
+++ b/tests/kernelcreation/test_sum_prod.py
@@ -10,7 +10,6 @@
 import pytest
 import numpy as np
 
-import pystencils.config
 import sympy as sp
 import sympy.abc
 
@@ -60,7 +59,7 @@ def test_product(dtype):
 
     assignments = ps.AssignmentCollection({x.center(): sum})
 
-    config = pystencils.config.CreateKernelConfig()
+    config = ps.CreateKernelConfig()
 
     ast = ps.create_kernel(assignments, config=config)
     code = ps.get_code_str(ast)
diff --git a/tests/nbackend/kernelcreation/test_context.py b/tests/nbackend/kernelcreation/test_context.py
index 384fc93158a9f7aa7ff9911b20382c0b79ed36ee..200c1e34e8ab3ac04fa119491805ef61111062c6 100644
--- a/tests/nbackend/kernelcreation/test_context.py
+++ b/tests/nbackend/kernelcreation/test_context.py
@@ -6,7 +6,7 @@ from pystencils import Field, TypedSymbol, FieldType, DynamicType
 from pystencils.backend.kernelcreation import KernelCreationContext
 from pystencils.backend.constants import PsConstant
 from pystencils.backend.memory import PsSymbol
-from pystencils.backend.properties import FieldShape, FieldStride
+from pystencils.codegen.properties import FieldShape, FieldStride
 from pystencils.backend.exceptions import KernelConstraintsError
 from pystencils.types.quick import SInt, Fp
 from pystencils.types import deconstify
diff --git a/tests/nbackend/kernelcreation/test_options.py b/tests/nbackend/kernelcreation/test_options.py
index 7fa7fc5131c657da7b9faa5fb983330e6fde8964..fefcc98fe62e956aeeba47543667e82bff758ec1 100644
--- a/tests/nbackend/kernelcreation/test_options.py
+++ b/tests/nbackend/kernelcreation/test_options.py
@@ -2,7 +2,7 @@ import pytest
 
 from pystencils.field import Field, FieldType
 from pystencils.types.quick import *
-from pystencils.config import (
+from pystencils.codegen.config import (
     CreateKernelConfig,
     PsOptionsError,
 )
diff --git a/tests/nbackend/test_code_printing.py b/tests/nbackend/test_code_printing.py
index ef4806314eb52c7389bf583027bb808c42049213..109cfdc1914f86a89e150c6a7dc4e9a7bc382cf9 100644
--- a/tests/nbackend/test_code_printing.py
+++ b/tests/nbackend/test_code_printing.py
@@ -1,11 +1,6 @@
-from pystencils import Target
-
 from pystencils.backend.ast.expressions import PsExpression
-from pystencils.backend.ast.structural import PsAssignment, PsLoop, PsBlock
-from pystencils.backend.kernelfunction import KernelFunction
-from pystencils.backend.memory import PsSymbol, PsBuffer
+from pystencils.backend.memory import PsSymbol
 from pystencils.backend.constants import PsConstant
-from pystencils.backend.literals import PsLiteral
 from pystencils.types.quick import Fp, SInt, UInt, Bool
 from pystencils.backend.emission import CAstPrinter
 
@@ -129,7 +124,7 @@ def test_relations_precedence():
 
 def test_ternary():
     from pystencils.backend.ast.expressions import PsTernary
-    from pystencils.backend.ast.expressions import PsNot, PsAnd, PsOr
+    from pystencils.backend.ast.expressions import PsAnd, PsOr
 
     p, q = [PsExpression.make(PsSymbol(x, Bool())) for x in "pq"]
     x, y, z = [PsExpression.make(PsSymbol(x, Fp(32))) for x in "xyz"]
diff --git a/tests/nbackend/test_cpujit.py b/tests/nbackend/test_cpujit.py
index 648112ef95bf5d6c3181f5c3c2527dd870220f0e..c053df9a9e0d381d5f92d129a3b9280a7d56f236 100644
--- a/tests/nbackend/test_cpujit.py
+++ b/tests/nbackend/test_cpujit.py
@@ -1,6 +1,6 @@
 import pytest
 
-from pystencils import Target
+from pystencils import Target, Kernel
 
 # from pystencils.backend.constraints import PsKernelParamsConstraint
 from pystencils.backend.memory import PsSymbol, PsBuffer
@@ -8,10 +8,9 @@ from pystencils.backend.constants import PsConstant
 
 from pystencils.backend.ast.expressions import PsBufferAcc, PsExpression
 from pystencils.backend.ast.structural import PsAssignment, PsBlock, PsLoop
-from pystencils.backend.kernelfunction import KernelFunction
 
 from pystencils.types.quick import SInt, Fp
-from pystencils.backend.jit import LegacyCpuJit
+from pystencils.jit import LegacyCpuJit
 
 import numpy as np
 
@@ -45,7 +44,7 @@ def test_pairwise_addition():
         PsBlock([update])
     )
 
-    func = KernelFunction(PsBlock([loop]), Target.CPU, "kernel", set())
+    func = Kernel(PsBlock([loop]), Target.CPU, "kernel", set())
 
     # sizes_constraint = PsKernelParamsConstraint(
     #     u.shape[0].eq(2 * v.shape[0]),
diff --git a/tests/nbackend/test_vectorization.py b/tests/nbackend/test_vectorization.py
index 55330c9ee8d5d675379418748aa085ab4ce3ae73..a4825669c0d930da1a5962d14e66a1cc0c457d8c 100644
--- a/tests/nbackend/test_vectorization.py
+++ b/tests/nbackend/test_vectorization.py
@@ -19,8 +19,8 @@ from pystencils.backend.transformations import (
     LowerToC,
 )
 from pystencils.backend.constants import PsConstant
-from pystencils.backend.kernelfunction import create_cpu_kernel_function
-from pystencils.backend.jit import LegacyCpuJit
+from pystencils.codegen.driver import create_cpu_kernel_function
+from pystencils.jit import LegacyCpuJit
 
 from pystencils import Target, fields, Assignment, Field
 from pystencils.field import create_numpy_array_with_layout