Skip to content
Snippets Groups Projects
Commit b1b8f513 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

first successful tests of cupy JIT; waiting for !393 to continue

parent f340af58
No related branches found
No related tags found
1 merge request!384Fundamental GPU Support
Pipeline #67193 failed
......@@ -5,10 +5,6 @@ class PsInternalCompilerError(Exception):
"""Indicates an internal error during kernel translation, most likely due to a bug inside pystencils."""
class PsOptionsError(Exception):
"""Indicates an option clash in the `CreateKernelConfig`."""
class PsInputError(Exception):
"""Indicates unsupported user input to the translation system"""
......
......@@ -27,14 +27,9 @@ Both are available here through `LegacyCpuJit` and `LegacyGpuJit`.
"""
from .jit import JitBase, NoJit, LegacyCpuJit, LegacyGpuJit
from .gpu_cupy import CupyJit
no_jit = NoJit()
"""Disables just-in-time compilation for a kernel."""
__all__ = [
"JitBase",
"LegacyCpuJit",
"NoJit",
"no_jit",
"LegacyGpuJit",
]
__all__ = ["JitBase", "LegacyCpuJit", "NoJit", "no_jit", "LegacyGpuJit", "CupyJit"]
from typing import Callable, Any
from dataclasses import dataclass
try:
import cupy as cp
HAVE_CUPY = True
except ImportError:
HAVE_CUPY = False
from ...enums import Target
from ...kernel_wrapper import KernelWrapper
from ...types import PsType
from .jit import JitBase, JitError
......@@ -26,7 +32,7 @@ class LaunchGrid:
block: tuple[int, int, int]
class CupyKernelWrapper:
class CupyKernelWrapper(KernelWrapper):
def __init__(
self,
kfunc: GpuKernelFunction,
......@@ -34,14 +40,22 @@ class CupyKernelWrapper:
block_size: tuple[int, int, int],
):
self._kfunc = kfunc
self._kernel = raw_kernel
self._raw_kernel = raw_kernel
self._block_size = block_size
@property
def kernel_function(self) -> GpuKernelFunction:
return self._kfunc
@property
def raw_kernel(self) -> cp.RawKernel:
return self._raw_kernel
def __call__(self, **kwargs: Any) -> Any:
kernel_args, launch_grid = self._get_args(**kwargs)
device = self._get_device(kernel_args)
with cp.cuda.device(device):
self._kernel(launch_grid.grid, launch_grid.block, kernel_args)
with cp.cuda.Device(device):
self._raw_kernel(launch_grid.grid, launch_grid.block, kernel_args)
def _get_device(self, kernel_args):
devices = set(a.device.id for a in kernel_args if type(a) is cp.ndarray)
......@@ -62,6 +76,7 @@ class CupyKernelWrapper:
valuation[name] = arg
# Collect parameter values
# TODO: Check array sizes
arr: cp.ndarray
for kparam in self._kfunc.parameters:
......@@ -81,7 +96,7 @@ class CupyKernelWrapper:
case FieldStrideParam(name, dtype, field, coord):
arr = kwargs[field.name]
add_arg(name, arr.strides[coord], dtype)
add_arg(name, arr.strides[coord] // arr.dtype.itemsize, dtype)
case KernelParameter(name, dtype):
val: Any = kwargs[name]
......@@ -119,11 +134,14 @@ class CupyJit(JitBase):
def __init__(self, default_block_size: tuple[int, int, int] = (128, 2, 1)):
# TODO: Fp16 headers
self._runtime_headers = {"<cstdint>", '"gpu_defines.h"'}
self._runtime_headers = {"<cstdint>"}
self._default_block_size = default_block_size
def compile(self, kfunc: KernelFunction) -> Callable[..., None]:
import cupy as cp
if not HAVE_CUPY:
raise JitError(
"`cupy` is not installed: just-in-time-compilation of CUDA kernels is unavailable."
)
if not isinstance(kfunc, GpuKernelFunction) or kfunc.target != Target.CUDA:
raise ValueError(
......@@ -157,4 +175,4 @@ class CupyJit(JitBase):
def _kernel_code(self, kfunc: GpuKernelFunction) -> str:
kernel_code = emit_code(kfunc)
return f'extern "C" {{\n{kernel_code}\n}}\n'
return f'extern "C" {kernel_code}'
from __future__ import annotations
from typing import cast
from typing import cast, TYPE_CHECKING
from .context import KernelCreationContext
from ..platforms import GenericCpu
from ..ast.structural import PsBlock
from ...config import CpuOptimConfig, OpenMpConfig
if TYPE_CHECKING:
from ..platforms import GenericCpu
def optimize_cpu(
ctx: KernelCreationContext,
......
from __future__ import annotations
from abc import ABC
from typing import Callable, Sequence, Iterable
from typing import Callable, Sequence, Iterable, TYPE_CHECKING
from .ast.structural import PsBlock
from .ast.analysis import collect_required_headers, collect_undefined_symbols
......@@ -12,11 +12,13 @@ from .platforms import Platform, GpuThreadsRange
from .constraints import KernelParamsConstraint
from ..types import PsType
from .jit import JitBase, no_jit
from ..enums import Target
from ..field import Field
if TYPE_CHECKING:
from .jit import JitBase
class KernelParameter:
__match_args__ = ("name", "dtype")
......@@ -121,7 +123,7 @@ class KernelFunction:
parameters: Sequence[KernelParameter],
required_headers: set[str],
constraints: Sequence[KernelParamsConstraint],
jit: JitBase = no_jit,
jit: JitBase,
):
self._body: PsBlock = body
self._target = target
......@@ -196,7 +198,7 @@ class GpuKernelFunction(KernelFunction):
parameters: Sequence[KernelParameter],
required_headers: set[str],
constraints: Sequence[KernelParamsConstraint],
jit: JitBase = no_jit,
jit: JitBase,
):
super().__init__(
body, target, name, parameters, required_headers, constraints, jit
......
......@@ -45,7 +45,7 @@ class CudaPlatform(GenericGpu):
@property
def required_headers(self) -> set[str]:
return {"gpu_defines.h"}
return {'"gpu_defines.h"'}
def materialize_iteration_space(
self, body: PsBlock, ispace: IterationSpace
......
from __future__ import annotations
from typing import TYPE_CHECKING
from collections.abc import Collection
......@@ -8,12 +9,17 @@ from dataclasses import dataclass
from .enums import Target
from .field import Field, FieldType
from .backend.jit import JitBase
from .backend.exceptions import PsOptionsError
from .types import PsIntegerType, PsNumericType, PsIeeeFloatType
from .defaults import DEFAULTS
if TYPE_CHECKING:
from .backend.jit import JitBase
class PsOptionsError(Exception):
"""Indicates an option clash in the `CreateKernelConfig`."""
@dataclass
class OpenMpConfig:
......@@ -252,7 +258,9 @@ class CreateKernelConfig:
if self.gpu_indexing is not None:
if self.target != Target.SYCL:
raise PsOptionsError(f"`sycl_indexing` cannot be set for non-SYCL target {self.target}")
raise PsOptionsError(
f"`sycl_indexing` cannot be set for non-SYCL target {self.target}"
)
# Infer JIT
if self.jit is None:
......@@ -260,8 +268,19 @@ class CreateKernelConfig:
from .backend.jit import LegacyCpuJit
self.jit = LegacyCpuJit()
elif self.target == Target.CUDA:
try:
from .backend.jit.gpu_cupy import CupyJit
self.jit = CupyJit()
except ImportError:
from .backend.jit import no_jit
self.jit = no_jit
elif self.target == Target.SYCL:
from .backend.jit import no_jit
self.jit = no_jit
else:
raise NotImplementedError(
......
#%%
import pytest
import sympy as sp
import numpy as np
from pystencils import fields, Field, AssignmentCollection
from pystencils import fields, Field, AssignmentCollection, Target, CreateKernelConfig
from pystencils.sympyextensions.astnodes import assignment_from_stencil
from pystencils.kernelcreation import create_kernel
def test_filter_kernel():
@pytest.mark.parametrize("target", (Target.GenericCPU, Target.CUDA))
def test_filter_kernel(target):
if target == Target.CUDA:
xp = pytest.importorskip("cupy")
else:
xp = np
weight = sp.Symbol("weight")
stencil = [
[1, 1, 1],
......@@ -19,18 +27,19 @@ def test_filter_kernel():
asm = assignment_from_stencil(stencil, src, dst, normalization_factor=weight)
asms = AssignmentCollection([asm])
ast = create_kernel(asms)
gen_config = CreateKernelConfig(target=target)
ast = create_kernel(asms, gen_config)
kernel = ast.compile()
src_arr = np.ones((42, 42))
dst_arr = np.zeros_like(src_arr)
src_arr = xp.ones((42, 42))
dst_arr = xp.zeros_like(src_arr)
kernel(src=src_arr, dst=dst_arr, weight=2.0)
expected = np.zeros_like(src_arr)
expected = xp.zeros_like(src_arr)
expected[1:-1, 1:-1].fill(18.0)
np.testing.assert_allclose(dst_arr, expected)
xp.testing.assert_allclose(dst_arr, expected)
def test_filter_kernel_fixedsize():
......@@ -59,3 +68,8 @@ def test_filter_kernel_fixedsize():
expected[1:-1, 1:-1].fill(18.0)
np.testing.assert_allclose(dst_arr, expected)
#%%
test_filter_kernel(Target.CUDA)
# %%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment