From c8c1a548ec3e25dbda9e11a1e9ffc09601f13dec Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Sat, 15 Mar 2025 11:59:15 +0100 Subject: [PATCH] Use newly introduced `HIP` target. - remove `sfg.use_cuda` and `sfg.use_hip`; infer API from kernel target instead - Adapt test cases - Adapt composer user guide --- docs/source/usage/how_to_composer.md | 35 ++++++------- src/pystencilssfg/composer/gpu_composer.py | 49 +++++-------------- src/pystencilssfg/lang/gpu.py | 8 +++ tests/generator_scripts/source/CudaKernels.py | 8 +-- tests/generator_scripts/source/HipKernels.py | 8 +-- 5 files changed, 43 insertions(+), 65 deletions(-) diff --git a/docs/source/usage/how_to_composer.md b/docs/source/usage/how_to_composer.md index d21d46f..12a8435 100644 --- a/docs/source/usage/how_to_composer.md +++ b/docs/source/usage/how_to_composer.md @@ -346,20 +346,13 @@ The next section explains how that is achieved in pystencils-sfg. #### Invoking GPU Kernels -Pystencils also allows us to generate kernels for the CUDA and HIP GPU platforms. -First, we need to decide for one of the two systems by calling either -{any}`sfg.use_cuda <SfgGpuComposer.use_cuda>` or {any}`sfg.use_hip <SfgGpuComposer.use_hip>`. +Pystencils also allows us to generate kernels for the CUDA and HIP GPU programming models. +To generate a kernel targetting either of these, set the +{any}`target <pystencils.codegen.config.CreateKernelConfig.target>` +code generator option to either `Target.CUDA` or `Target.HIP`. After registering a GPU kernel, -you can render its invocation using {any}`sfg.gpu_invoke <SfgGpuComposer.gpu_invoke>`. - -Here is a basic example: - -```{code-cell} ipython3 -:tags: [remove-cell] - -f, g = ps.fields("f, g: double[2D]") -asm = ps.Assignment(f(0), g(0)) -``` +its invocation can be rendered using {any}`sfg.gpu_invoke <SfgGpuComposer.gpu_invoke>`. +Here is an example using CUDA: ```{code-cell} ipython3 from pystencilssfg import SfgConfig @@ -367,12 +360,14 @@ sfg_config = SfgConfig() sfg_config.extensions.impl = "cu" with SourceFileGenerator(sfg_config) as sfg: - # Activate CUDA - sfg.use_cuda() + # Configure the code generator to use CUDA + cfg = ps.CreateKernelConfig(target=ps.Target.CUDA) + + # Create fields, assemble assignments + f, g = ps.fields("f, g: double[2D]") + asm = ps.Assignment(f(0), g(0)) - # Register the GPU kernel - cfg = ps.CreateKernelConfig() - cfg.target = ps.Target.CUDA + # Register kernel khandle = sfg.kernels.create(asm, "gpu_kernel", cfg) # Invoke it @@ -442,8 +437,8 @@ with SourceFileGenerator() as sfg: :::{admonition} To Do - - Creating and calling kernels - - Invoking GPU kernels and the CUDA API Mirror + - Modifying GPU kernel launch configs + - GPU API Reflections - Defining classes, their fields constructors, and methods ::: diff --git a/src/pystencilssfg/composer/gpu_composer.py b/src/pystencilssfg/composer/gpu_composer.py index 757ab66..72628ab 100644 --- a/src/pystencilssfg/composer/gpu_composer.py +++ b/src/pystencilssfg/composer/gpu_composer.py @@ -2,7 +2,7 @@ from __future__ import annotations from typing import overload -from pystencils.codegen import GpuKernel +from pystencils.codegen import GpuKernel, Target from pystencils.codegen.gpu_indexing import ( ManualLaunchConfiguration, AutomaticLaunchConfiguration, @@ -20,7 +20,7 @@ from ..ir import ( SfgSequence, ) from ..lang import ExprLike, AugExpr -from ..lang.gpu import ProvidesGpuRuntimeAPI +from ..lang.gpu import CudaAPI, HipAPI, ProvidesGpuRuntimeAPI class SfgGpuComposer(SfgComposerMixIn): @@ -69,40 +69,6 @@ class SfgGpuComposer(SfgComposerMixIn): .. _Launch Configurations in HIP: https://rocmdocs.amd.com/projects/HIP/en/latest/how-to/hip_cpp_language_extensions.html#calling-global-functions """ # NOQA: E501 - def __init__(self) -> None: - self._gpu_api_provider: ProvidesGpuRuntimeAPI | None = None - - def use_cuda(self): - """Instruct the GPU composer to use the CUDA runtime API""" - from ..lang.gpu import CudaAPI - - self._gpu_api_provider = CudaAPI() - - def use_hip(self): - """Instruct the GPU composer to use the HIP runtime API""" - from ..lang.gpu import HipAPI - - self._gpu_api_provider = HipAPI() - - @property - def gpu_api(self) -> ProvidesGpuRuntimeAPI | None: - """GPU runtime API wrapper currently used by this GPU composer, - or `None` if none was selected.""" - return self._gpu_api_provider - - def get_gpu_api(self) -> ProvidesGpuRuntimeAPI: - """GPU runtime API provider currently used by this GPU composer. - - Raises: - AttributeError: If no runtime API was set yet (see `use_cuda`, `use_hip`) - """ - if self._gpu_api_provider is None: - raise AttributeError( - "No GPU API was selected - call `use_cuda()` or `use_hip()` first." - ) - - return self._gpu_api_provider - @overload def gpu_invoke( self, @@ -145,7 +111,16 @@ class SfgGpuComposer(SfgComposerMixIn): launch_config = ker.get_launch_configuration() - dim3 = self.get_gpu_api().dim3 + gpu_api: type[ProvidesGpuRuntimeAPI] + match ker.target: + case Target.CUDA: + gpu_api = CudaAPI + case Target.HIP: + gpu_api = HipAPI + case _: + assert False, "unexpected GPU target" + + dim3 = gpu_api.dim3 grid_size: ExprLike block_size: ExprLike diff --git a/src/pystencilssfg/lang/gpu.py b/src/pystencilssfg/lang/gpu.py index 0ca2a6d..c9736fb 100644 --- a/src/pystencilssfg/lang/gpu.py +++ b/src/pystencilssfg/lang/gpu.py @@ -55,6 +55,10 @@ class CudaAPI(ProvidesGpuRuntimeAPI): template = cpptype("cudaStream_t", "<cuda_runtime.h>") +cuda = CudaAPI +"""Reflection of the CUDA runtime API""" + + class HipAPI(ProvidesGpuRuntimeAPI): """Reflection of the HIP runtime API""" @@ -65,3 +69,7 @@ class HipAPI(ProvidesGpuRuntimeAPI): class stream_t(CppClass): template = cpptype("hipStream_t", "<hip/hip_runtime.h>") + + +hip = HipAPI +"""Reflection of the HIP runtime API""" diff --git a/tests/generator_scripts/source/CudaKernels.py b/tests/generator_scripts/source/CudaKernels.py index 8572782..e019e4f 100644 --- a/tests/generator_scripts/source/CudaKernels.py +++ b/tests/generator_scripts/source/CudaKernels.py @@ -1,5 +1,6 @@ from pystencilssfg import SourceFileGenerator from pystencilssfg.lang.cpp import std +from pystencilssfg.lang.gpu import cuda import pystencils as ps @@ -11,14 +12,13 @@ asm = ps.Assignment(dst(0), 2 * src(0)) with SourceFileGenerator() as sfg: - sfg.use_cuda() sfg.namespace("gen") base_config = ps.CreateKernelConfig(target=ps.Target.CUDA) - block_size = sfg.gpu_api.dim3().var("blockSize") - grid_size = sfg.gpu_api.dim3().var("gridSize") - stream = sfg.gpu_api.stream_t().var("stream") + block_size = cuda.dim3().var("blockSize") + grid_size = cuda.dim3().var("gridSize") + stream = cuda.stream_t().var("stream") with sfg.namespace("linear3d"): cfg = base_config.copy() diff --git a/tests/generator_scripts/source/HipKernels.py b/tests/generator_scripts/source/HipKernels.py index 32d9b1d..20d9df5 100644 --- a/tests/generator_scripts/source/HipKernels.py +++ b/tests/generator_scripts/source/HipKernels.py @@ -1,5 +1,6 @@ from pystencilssfg import SourceFileGenerator from pystencilssfg.lang.cpp import std +from pystencilssfg.lang.gpu import hip import pystencils as ps @@ -11,14 +12,13 @@ asm = ps.Assignment(dst(0), 2 * src(0)) with SourceFileGenerator() as sfg: - sfg.use_hip() sfg.namespace("gen") base_config = ps.CreateKernelConfig(target=ps.Target.HIP) - block_size = sfg.gpu_api.dim3().var("blockSize") - grid_size = sfg.gpu_api.dim3().var("gridSize") - stream = sfg.gpu_api.stream_t().var("stream") + block_size = hip.dim3().var("blockSize") + grid_size = hip.dim3().var("gridSize") + stream = hip.stream_t().var("stream") with sfg.namespace("linear3d"): cfg = base_config.copy() -- GitLab