From 8af40ae4a930afd3b91078639b897050781c6c8a Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Thu, 12 Dec 2024 11:06:52 +0100
Subject: [PATCH] Clarify some doc comments; clarify launch grid specification

---
 docs/source/reference/gpu_kernels.md | 19 +++++++++++++------
 src/pystencils/config.py             |  7 ++++++-
 src/pystencils/target.py             |  2 +-
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/docs/source/reference/gpu_kernels.md b/docs/source/reference/gpu_kernels.md
index c3fa70ec2..1045f80d4 100644
--- a/docs/source/reference/gpu_kernels.md
+++ b/docs/source/reference/gpu_kernels.md
@@ -78,19 +78,26 @@ kfunc(f=f_arr, g=g_arr)
 ### Modifying the Launch Grid
 
 The `kernel.compile()` invocation in the above code produces a {any}`CupyKernelWrapper` callable object.
-Its interface allows us to customize the GPU launch grid.
-We can manually set both the number of threads per block, and the number of blocks on the grid:
+This object holds the kernel's launch grid configuration
+(i.e. the number of thread blocks, and the number of threads per block.)
+Pystencils specifies a default value for the block size and if possible, 
+the number of blocks is automatically inferred in order to cover the entire iteration space.
+In addition, the wrapper's interface allows us to customize the GPU launch grid,
+by manually setting both the number of threads per block, and the number of blocks on the grid:
 
 ```{code-cell} ipython3
 kfunc.block_size = (16, 8, 8)
 kfunc.num_blocks = (1, 2, 2)
 ```
 
-In most cases, the number of blocks is automatically inferred from the block size
-in order to cover the entire iteration space, so it does not need to be specified.
-Setting a launch grid that is larger than the iteration space is also possible,
-but will cause any threads working outside of the iteration bounds to idle.
+For most kernels, setting only the `block_size` is sufficient since pystencils will
+automatically compute the number of blocks;
+for exceptions to this, see [](#manual_launch_grids).
+If `num_blocks` is set manually and the launch grid thus specified is too small, only
+a part of the iteration space will be traversed by the kernel;
+similarily, if it is too large, it will cause any threads working outside of the iteration bounds to idle.
 
+(manual_launch_grids)=
 ### Manual Launch Grids and Non-Cuboid Iteration Patterns
 
 In some cases, it will be unavoidable to set the launch grid size manually;
diff --git a/src/pystencils/config.py b/src/pystencils/config.py
index c08ddc161..506f7fd78 100644
--- a/src/pystencils/config.py
+++ b/src/pystencils/config.py
@@ -33,7 +33,12 @@ class _AUTO_TYPE:
 
 
 AUTO = _AUTO_TYPE()
-"""Special value that can be passed to some options for invoking automatic behaviour."""
+"""Special value that can be passed to some options for invoking automatic behaviour.
+
+Currently, these options permit `AUTO`:
+
+- `ghost_layers <CreateKernelConfig.ghost_layers>`
+"""
 
 
 @dataclass
diff --git a/src/pystencils/target.py b/src/pystencils/target.py
index 7f26c4466..5d897dd7e 100644
--- a/src/pystencils/target.py
+++ b/src/pystencils/target.py
@@ -87,7 +87,7 @@ class Target(Flag):
     """
 
     GPU = CUDA
-    """Alias for backward compatibility."""
+    """Alias for `Target.CUDA`, for backward compatibility."""
 
     SYCL = _GPU | _SYCL
     """SYCL kernel target.
-- 
GitLab