From 3d44a19e67699cc9f0b0d73426d7e76de985a742 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Fri, 14 Mar 2025 18:41:36 +0000
Subject: [PATCH] Update docs, installation + contrib guide

---
 docs/source/backend/gpu_codegen.md       | 14 +++-----
 docs/source/backend/platforms.md         | 15 ++++++--
 docs/source/contributing/dev-workflow.md | 46 +++++++++++++++++++++---
 docs/source/installation.md              | 21 ++---------
 docs/source/user_manual/gpu_kernels.md   |  6 ++--
 pyproject.toml                           |  3 +-
 6 files changed, 67 insertions(+), 38 deletions(-)

diff --git a/docs/source/backend/gpu_codegen.md b/docs/source/backend/gpu_codegen.md
index 1082669e6..0415c12c6 100644
--- a/docs/source/backend/gpu_codegen.md
+++ b/docs/source/backend/gpu_codegen.md
@@ -2,23 +2,19 @@
 
 The code generation infrastructure for Nvidia and AMD GPUs using CUDA and HIP comprises the following components:
 
- - The {any}`CudaPlatform` at `backend.platforms` which performs materialization of a kernel's iteration
-   space by mapping GPU block and thread indices to iteration space points. To perform this task,
-   it depends on a {any}`ThreadMapping` instance which defines the nature of that mapping.
+ - The platforms {any}`CudaPlatform` and {any}`HipPlatform` at `backend.platforms`
+   which perform materialization of a kernel's iteration space
+   by mapping GPU block and thread indices to iteration space points.
+   To perform this task, it depends on a {any}`ThreadMapping` instance which defines the nature of that mapping.
    The platform also takes care of lowering mathematical functions to their CUDA runtime library implementation.
  - In the code generation driver, the strings are drawn by the `GpuIndexing` helper class.
    It provides both the {any}`ThreadMapping` for the codegen backend, as well as the launch configuration
    for the runtime system.
 
-:::{attention}
-
-Code generation for HIP through the `CudaPlatform` is experimental and not tested at the moment.
-:::
-
 ## The CUDA Platform and Thread Mappings
 
 ```{eval-rst}
-.. module:: pystencils.backend.platforms.cuda
+.. module:: pystencils.backend.platforms.generic_gpu
 
 .. autosummary::
     :toctree: generated
diff --git a/docs/source/backend/platforms.md b/docs/source/backend/platforms.md
index e7ffc6f15..2d2c33d86 100644
--- a/docs/source/backend/platforms.md
+++ b/docs/source/backend/platforms.md
@@ -26,7 +26,6 @@ targets in the future.
     Platform
     GenericCpu
     GenericVectorCpu
-    GenericGpu
 ```
 
 ## CPU Platforms
@@ -49,6 +48,18 @@ targets in the future.
     :nosignatures:
     :template: autosummary/entire_class.rst
 
+    GenericGpu
     CudaPlatform
-    SyclPlatform
+    HipPlatform
 ```
+
+## Experimental Platforms
+
+```{eval-rst}
+.. autosummary::
+    :toctree: generated
+    :nosignatures:
+    :template: autosummary/entire_class.rst
+
+    SyclPlatform
+```
\ No newline at end of file
diff --git a/docs/source/contributing/dev-workflow.md b/docs/source/contributing/dev-workflow.md
index 8daac8cbd..d92916130 100644
--- a/docs/source/contributing/dev-workflow.md
+++ b/docs/source/contributing/dev-workflow.md
@@ -48,16 +48,22 @@ git pull --set-upstream upstream master
 
 ## Set Up the Python Environment
 
+### Prerequesites
+
 To develop pystencils, you will need at least the following software installed on your machine:
 
 - Python 3.10 or later: Since pystencils minimal supported version is Python 3.10, we recommend that you work with Python 3.10 directly.
 - An up-to-date C++ compiler, used by pystencils to JIT-compile generated code
 - [Nox](https://nox.thea.codes/en/stable/), which we use for test automation.
   Nox will be used extensively in the instructions on testing below.
-- Optionally [CUDA](https://developer.nvidia.com/cuda-toolkit),
-  if you have an Nvidia or AMD GPU and plan to develop on pystencils' GPU capabilities
+- Optionally, for GPU development:
+  - At least CUDA 11 for Nvidia GPUs, or
+  - At least ROCm/HIP 6.1 for AMD GPUs.
+
+### Virtual Environment Setup
 
-Once you have these, set up a [virtual environment](https://docs.python.org/3/library/venv.html) for development.
+Once you have all the prerequesites,
+set up a [virtual environment](https://docs.python.org/3/library/venv.html) for development.
 This ensures that your system's installation of Python is kept clean, and isolates your development environment
 from outside influence.
 Use the following commands to create a virtual environment at `.venv` and perform an editable install of pystencils into it:
@@ -74,7 +80,39 @@ Setting `PIP_REQUIRE_VIRTUALENV` ensures that pip refuses to install packages gl
 Consider setting this variable globally in your shell's configuration file.
 :::
 
-You are now ready to go! Create a new git branch to work on, open up an IDE, and start coding.
+:::{admonition} Feature Groups
+The above installation instructions assume that you will be running all code checking
+and test tasks through `nox`.
+If you need or want to run them manually, you will need to add one or more
+of these feature groups to your installation:
+
+ - `doc`, which contains all dependencies required to build this documentation;
+ - `dev`, which adds `flake8` for code style checking,
+   `mypy` for static type checking,
+    and the `black` formatter;
+ - `testsuite`, which adds `pytest` plus plugins and some more dependencies required
+   for running the test suite.
+
+Depending on your development focus, you might also need to add some of the user feature
+groups listed in [the installation guide](#installation_guide).
+:::
+
+### Cupy for CUDA and HIP
+
+When developing for NVidia or AMD GPUs, you will likely need an installation of [cupy](https://cupy.dev/).
+Since cupy has to be built specifically against the libraries of a given CUDA or ROCm version,
+it cannot be installed directly via dependency resolution from pystencils.
+For instructions on how to install Cupy, refer to their [installation manual](https://docs.cupy.dev/en/stable/install.html).
+
+### Test Your Setup
+
+To check if your setup is complete, a good check is to invoke the pystencils test suite:
+
+```bash
+nox -s "testsuite(cpu)"
+```
+
+If this finishes without errors, you are ready to go! Create a new git branch to work on, open up an IDE, and start coding.
 Make sure your IDE recognizes the virtual environment you created, though.
 
 ## Static Code Analysis
diff --git a/docs/source/installation.md b/docs/source/installation.md
index 8c344e760..5cb274c93 100644
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@@ -1,4 +1,4 @@
-(_installation)=
+(installation_guide)=
 # Setup and Installation
 
 ## Install pystencils
@@ -17,7 +17,7 @@ git clone -b v2.0-dev https://i10git.cs.fau.de/pycodegen/pystencils.git
 pip install -e pystencils
 ```
 
-### Feature Groups
+## Feature Groups
 
 In both cases, you can add a set of optional features to your installation by listing them
 in square brackets (e.g. `pip install -e pystencils[feature1, feature2]`).
@@ -33,22 +33,7 @@ The following feature sets are available:
 - `use_cython`: Install [Cython](https://cython.org/), which is used internally by pystencils
   to accelerate the setup of boundary conditions.
 
-:::{dropdown} For Developers
-
-If you are developing pystencils, we recommend you perform an editable install of your
-local clone of the repository, with all optional features:
-```bash
-pip install -e pystencils[alltrafos,interactive,use_cython,doc,testsuite]
-```
-
-This includes the additional feature groups `doc`, which contains all dependencies required
-to build this documentation, and `tests`, which adds `flake8` for code style checking,
-`mypy` for static type checking, and `pytest` plus plugins for running the test suite.
-
-For more information on developing pystencils, see the [](#contribution_guide).
-:::
-
-### For GPUs
+## For GPUs
 
 If you have an Nvidia graphics processor and CUDA installed, you can use pystencils to directly compile
 and execute kernels running on your GPU.
diff --git a/docs/source/user_manual/gpu_kernels.md b/docs/source/user_manual/gpu_kernels.md
index 2219ce042..14a29c41c 100644
--- a/docs/source/user_manual/gpu_kernels.md
+++ b/docs/source/user_manual/gpu_kernels.md
@@ -55,8 +55,8 @@ automatically select one or the other, depending on the current runtime environm
 
 :::{note}
 If `cupy` is not installed, `create_kernel` will raise an exception when using `Target.CurrentGPU`.
-When exporting kernels to be compiled externally in an environment where cupy is not available,
-the GPU target must therefore be set explicitly.
+You can still generate kernels for CUDA or HIP directly even without Cupy;
+you just won't be able to just-in-time compile and run them.
 :::
 
 Here is a snippet creating a kernel for the locally available GPU target:
@@ -218,7 +218,7 @@ assignments = [
 ```{code-cell} ipython3
 y = ps.DEFAULTS.spatial_counters[0]
 cfg = ps.CreateKernelConfig()
-cfg.target= ps.Target.CUDA
+cfg.target= ps.Target.CurrentGPU
 cfg.iteration_slice = ps.make_slice[:, y:]
 ```
 
diff --git a/pyproject.toml b/pyproject.toml
index 55b21cbbf..ae539b12c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,6 @@ classifiers = [
 "Source Code" = "https://i10git.cs.fau.de/pycodegen/pystencils"
 
 [project.optional-dependencies]
-gpu = ['cupy']
 alltrafos = ['islpy', 'py-cpuinfo']
 bench_db = ['blitzdb', 'pymongo', 'pandas']
 interactive = [
@@ -76,7 +75,7 @@ testsuite = [
     'matplotlib',
     'py-cpuinfo',
     'randomgen>=1.18',
-    'scipy'
+    'scipy',
 ]
 
 [build-system]
-- 
GitLab