Skip to content
Snippets Groups Projects
Commit 21c0ba7f authored by Frederik Hennig's avatar Frederik Hennig
Browse files

documentation on GPU invocation

parent 3ff729e9
Branches
No related tags found
1 merge request!24Extend Support for CUDA and HIP kernel invocations
import pystencilssfg
from pystencilssfg.config import SfgConfig
from os.path import splitext
class DocsPatchedGenerator(pystencilssfg.SourceFileGenerator):
"""Mockup wrapper around SourceFileGenerator for use in documentation
......@@ -30,21 +32,20 @@ class DocsPatchedGenerator(pystencilssfg.SourceFileGenerator):
self._finish_files()
header_code = self._emitter.dumps(self._header_file)
impl_code = (
None
if self._impl_file is None
else self._emitter.dumps(self._impl_file)
)
header_ext = splitext(self._header_file.name)[1]
mdcode = ":::::{tab-set}\n"
mdcode += "::::{tab-item} Generated Header (.hpp)\n"
mdcode += f"::::{{tab-item}} Generated Header ({header_ext})\n"
mdcode += ":::{code-block} C++\n\n"
mdcode += header_code
mdcode += "\n:::\n::::\n"
if impl_code:
mdcode += "::::{tab-item} Generated Implementation (.cpp)\n"
if self._impl_file is not None:
impl_code = self._emitter.dumps(self._impl_file)
impl_ext = splitext(self._impl_file.name)[1]
mdcode += f"::::{{tab-item}} Generated Implementation ({impl_ext})\n"
mdcode += ":::{code-block} C++\n\n"
mdcode += impl_code
mdcode += "\n:::\n::::\n"
......
......@@ -16,6 +16,9 @@ Composer API (``pystencilssfg.composer``)
.. autoclass:: SfgClassComposer
:members:
.. autoclass:: SfgGpuComposer
:members:
Custom Generators
=================
......
......@@ -344,6 +344,44 @@ cause them to be added to its signature.
We don't want to expose this complexity, but instead hide it by using appropriate data structures.
The next section explains how that is achieved in pystencils-sfg.
#### Invoking GPU Kernels
Pystencils also allows us to generate kernels for the CUDA and HIP GPU platforms.
First, we need to decide for one of the two systems by calling either
{any}`sfg.use_cuda <SfgGpuComposer.use_cuda>` or {any}`sfg.use_hip <SfgGpuComposer.use_hip>`.
After registering a GPU kernel,
you can render its invocation using {any}`sfg.gpu_invoke <SfgGpuComposer.gpu_invoke>`.
Here is a basic example:
```{code-cell} ipython3
:tags: [remove-cell]
f, g = ps.fields("f, g: double[2D]")
asm = ps.Assignment(f(0), g(0))
```
```{code-cell} ipython3
from pystencilssfg import SfgConfig
sfg_config = SfgConfig()
sfg_config.extensions.impl = "cu"
with SourceFileGenerator(sfg_config) as sfg:
# Activate CUDA
sfg.use_cuda()
# Register the GPU kernel
cfg = ps.CreateKernelConfig()
cfg.target = ps.Target.CUDA
khandle = sfg.kernels.create(asm, "gpu_kernel", cfg)
# Invoke it
sfg.function("kernel_wrapper")(
sfg.gpu_invoke(khandle)
)
```
#### Mapping Fields to Data Structures
Pystencils kernels operate on n-dimensional contiguous or strided arrays,
......
......@@ -9,6 +9,7 @@ from .basic_composer import (
)
from .mixin import SfgComposerMixIn
from .class_composer import SfgClassComposer
from .gpu_composer import SfgGpuComposer
__all__ = [
"SfgIComposer",
......@@ -20,4 +21,5 @@ __all__ = [
"ExprLike",
"SfgBasicComposer",
"SfgClassComposer",
"SfgGpuComposer",
]
......@@ -390,8 +390,8 @@ class SfgBasicComposer(SfgIComposer):
"""Use inside a function body to directly call a kernel.
When using `call`, the given kernel will simply be called as a function.
To invoke a GPU kernel on a specified launch grid, use `cuda_invoke`
or the interfaces of ``pystencilssfg.extensions.sycl`` instead.
To invoke a GPU kernel on a specified launch grid,
use `gpu_invoke <SfgGpuComposer.gpu_invoke>` instead.
Args:
kernel_handle: Handle to a kernel previously added to some kernel namespace.
......
......@@ -24,6 +24,50 @@ from ..lang.gpu import ProvidesGpuRuntimeAPI
class SfgGpuComposer(SfgComposerMixIn):
"""Composer mix-in providing methods to generate GPU kernel invocations.
.. function:: gpu_invoke(kernel_handle: SfgKernelHandle, **kwargs)
Invoke a GPU kernel with launch configuration parameters depending on its code generator configuration.
The overloads of this method are listed below.
They all (partially) mirror the CUDA and HIP ``kernel<<< Gs, Bs, Sm, St >>>()`` syntax;
for details on the launch configuration arguments,
refer to `Launch Configurations in CUDA`_
or `Launch Configurations in HIP`_.
.. function:: gpu_invoke(kernel_handle: SfgKernelHandle, *, grid_size: ExprLike, block_size: ExprLike, shared_memory_bytes: ExprLike = "0", stream: ExprLike | None = None, ) -> SfgCallTreeNode
:noindex:
Invoke a GPU kernel with a manual launch grid.
Requires that the kernel was generated
with `manual_launch_grid <pystencils.codegen.config.GpuOptions.manual_launch_grid>`
set to `True`.
.. function:: gpu_invoke(self, kernel_handle: SfgKernelHandle, *, shared_memory_bytes: ExprLike = "0", stream: ExprLike | None = None, ) -> SfgCallTreeNode
:noindex:
Invoke a GPU kernel with an automatic launch grid.
This signature accepts kernels generated with an indexing scheme that
causes the launch grid to be determined automatically,
such as `Blockwise4D <pystencils.codegen.config.GpuIndexingScheme.Blockwise4D>`.
.. function:: gpu_invoke(self, kernel_handle: SfgKernelHandle, *, block_size: ExprLike | None = None, shared_memory_bytes: ExprLike = "0", stream: ExprLike | None = None, ) -> SfgCallTreeNode
:noindex:
Invoke a GPU kernel with a dynamic launch grid.
This signature accepts kernels generated with an indexing scheme that permits a user-defined
blocks size, such as `Linear3D <pystencils.codegen.config.GpuIndexingScheme.Linear3D>`.
The grid size is calculated automatically by dividing the number of work items in each
dimension by the block size, rounding up.
.. _Launch Configurations in CUDA: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#execution-configuration
.. _Launch Configurations in HIP: https://rocmdocs.amd.com/projects/HIP/en/latest/how-to/hip_cpp_language_extensions.html#calling-global-functions
""" # NOQA: E501
def __init__(self) -> None:
self._gpu_api_provider: ProvidesGpuRuntimeAPI | None = None
......@@ -63,12 +107,7 @@ class SfgGpuComposer(SfgComposerMixIn):
block_size: ExprLike,
shared_memory_bytes: ExprLike = "0",
stream: ExprLike | None = None,
) -> SfgCallTreeNode:
"""Invoke a CUDA kernel with a manual launch grid.
Requires that the kernel was generated with `manual_launch_grid <GpuOptions.manual_launch_grid>`
set to `True`.
"""
) -> SfgCallTreeNode: ...
@overload
def gpu_invoke(
......@@ -77,12 +116,7 @@ class SfgGpuComposer(SfgComposerMixIn):
*,
shared_memory_bytes: ExprLike = "0",
stream: ExprLike | None = None,
) -> SfgCallTreeNode:
"""Invoke a CUDA kernel with an automatic launch grid.
This signature accepts kernels generated with an indexing scheme that permits
the automatic inferrence of the launch grid, such as `Blockwise4D <IndexingScheme.Blockwise4D>`
"""
) -> SfgCallTreeNode: ...
@overload
def gpu_invoke(
......@@ -92,14 +126,7 @@ class SfgGpuComposer(SfgComposerMixIn):
block_size: ExprLike | None = None,
shared_memory_bytes: ExprLike = "0",
stream: ExprLike | None = None,
) -> SfgCallTreeNode:
"""Invoke a CUDA kernel with a dynamic launch grid.
This signature accepts kernels generated with an indexing scheme that permits a user-defined
blocks size, such as `Linear3D <IndexingScheme.Linear3D>`.
The grid size is calculated automatically by dividing the number of work items in each
dimension by the block size, rounding up.
"""
) -> SfgCallTreeNode: ...
def gpu_invoke(self, kernel_handle: SfgKernelHandle, **kwargs) -> SfgCallTreeNode:
assert isinstance(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment