Skip to content
Snippets Groups Projects
Commit b132e39a authored by Frederik Hennig's avatar Frederik Hennig
Browse files

Clean up kernel creation code in `codegen.driver`. Fix and dry-test SYCL codegen.

parent c01fb82a
Branches
No related tags found
1 merge request!453Clean up kernel creation code in `codegen.driver`. Fix and dry-test SYCL codegen.
......@@ -24,12 +24,13 @@ from ..extensions.cpp import CppMethodCall
from ..kernelcreation import KernelCreationContext, AstFactory
from ..constants import PsConstant
from .generic_gpu import GenericGpu
from ..exceptions import MaterializationError
from ...types import PsCustomType, PsIeeeFloatType, constify, PsIntegerType
from .platform import Platform
class SyclPlatform(GenericGpu):
class SyclPlatform(Platform):
def __init__(
self,
......
from __future__ import annotations
from typing import cast, Sequence, Iterable, Callable, TYPE_CHECKING
from typing import cast, Sequence, Callable, TYPE_CHECKING
from dataclasses import dataclass, replace
from .target import Target
......@@ -15,6 +15,7 @@ from .config import (
from .kernel import Kernel, GpuKernel
from .properties import PsSymbolProperty, FieldBasePtr
from .parameters import Parameter
from .functions import Lambda
from .gpu_indexing import GpuIndexing, GpuLaunchConfiguration
from ..field import Field
......@@ -23,6 +24,7 @@ from ..types import PsIntegerType, PsScalarType
from ..backend.memory import PsSymbol
from ..backend.ast import PsAstNode
from ..backend.ast.structural import PsBlock, PsLoop
from ..backend.ast.expressions import PsExpression
from ..backend.ast.analysis import collect_undefined_symbols, collect_required_headers
from ..backend.kernelcreation import (
KernelCreationContext,
......@@ -201,20 +203,20 @@ class DefaultKernelCreationDriver:
canonicalize = CanonicalizeSymbols(self._ctx, True)
kernel_ast = cast(PsBlock, canonicalize(kernel_ast))
if self._target.is_cpu():
return create_cpu_kernel_function(
self._ctx,
kernel_factory = KernelFactory(self._ctx)
if self._target.is_cpu() or self._target == Target.SYCL:
return kernel_factory.create_generic_kernel(
self._platform,
kernel_ast,
self._cfg.get_option("function_name"),
self._target,
self._cfg.get_jit(),
)
else:
elif self._target.is_gpu():
assert self._gpu_indexing is not None
return create_gpu_kernel_function(
self._ctx,
return kernel_factory.create_gpu_kernel(
self._platform,
kernel_ast,
self._cfg.get_option("function_name"),
......@@ -222,6 +224,8 @@ class DefaultKernelCreationDriver:
self._cfg.get_jit(),
self._gpu_indexing.get_launch_config_factory(),
)
else:
assert False, "unexpected target"
def parse_kernel_body(
self,
......@@ -432,23 +436,11 @@ class DefaultKernelCreationDriver:
f"No platform is currently available for CPU target {self._target}"
)
elif Target._GPU in self._target:
elif self._target.is_gpu():
gpu_opts = self._cfg.gpu
omit_range_check: bool = gpu_opts.get_option("omit_range_check")
match self._target:
case Target.SYCL:
from ..backend.platforms import SyclPlatform
auto_block_size: bool = self._cfg.sycl.get_option(
"automatic_block_size"
)
return SyclPlatform(
self._ctx,
omit_range_check=omit_range_check,
automatic_block_size=auto_block_size,
)
case Target.CUDA:
from ..backend.platforms import CudaPlatform
......@@ -463,32 +455,52 @@ class DefaultKernelCreationDriver:
omit_range_check=omit_range_check,
thread_mapping=thread_mapping,
)
elif self._target == Target.SYCL:
from ..backend.platforms import SyclPlatform
auto_block_size: bool = self._cfg.sycl.get_option("automatic_block_size")
omit_range_check = self._cfg.gpu.get_option("omit_range_check")
return SyclPlatform(
self._ctx,
omit_range_check=omit_range_check,
automatic_block_size=auto_block_size,
)
raise NotImplementedError(
f"Code generation for target {self._target} not implemented"
)
def create_cpu_kernel_function(
ctx: KernelCreationContext,
class KernelFactory:
"""Factory for wrapping up backend and IR objects into exportable kernels and function objects."""
def __init__(self, ctx: KernelCreationContext):
self._ctx = ctx
def create_lambda(self, expr: PsExpression) -> Lambda:
"""Create a Lambda from an expression."""
params = self._get_function_params(expr)
return Lambda(expr, params)
def create_generic_kernel(
self,
platform: Platform,
body: PsBlock,
function_name: str,
target_spec: Target,
jit: JitBase,
) -> Kernel:
undef_symbols = collect_undefined_symbols(body)
params = _get_function_params(ctx, undef_symbols)
req_headers = _get_headers(ctx, platform, body)
"""Create a kernel for a generic target"""
params = self._get_function_params(body)
req_headers = self._get_headers(platform, body)
kfunc = Kernel(body, target_spec, function_name, params, req_headers, jit)
kfunc.metadata.update(ctx.metadata)
kfunc.metadata.update(self._ctx.metadata)
return kfunc
def create_gpu_kernel_function(
ctx: KernelCreationContext,
def create_gpu_kernel(
self,
platform: Platform,
body: PsBlock,
function_name: str,
......@@ -496,10 +508,9 @@ def create_gpu_kernel_function(
jit: JitBase,
launch_config_factory: Callable[[], GpuLaunchConfiguration],
) -> GpuKernel:
undef_symbols = collect_undefined_symbols(body)
params = _get_function_params(ctx, undef_symbols)
req_headers = _get_headers(ctx, platform, body)
"""Create a kernel for a GPU target"""
params = self._get_function_params(body)
req_headers = self._get_headers(platform, body)
kfunc = GpuKernel(
body,
......@@ -510,18 +521,17 @@ def create_gpu_kernel_function(
jit,
launch_config_factory,
)
kfunc.metadata.update(ctx.metadata)
kfunc.metadata.update(self._ctx.metadata)
return kfunc
def _symbol_to_param(ctx: KernelCreationContext, symbol: PsSymbol):
def _symbol_to_param(self, symbol: PsSymbol):
from pystencils.backend.memory import BufferBasePtr, BackendPrivateProperty
props: set[PsSymbolProperty] = set()
for prop in symbol.properties:
match prop:
case BufferBasePtr(buf):
field = ctx.find_field(buf.name)
field = self._ctx.find_field(buf.name)
props.add(FieldBasePtr(field))
case BackendPrivateProperty():
pass
......@@ -530,21 +540,16 @@ def _symbol_to_param(ctx: KernelCreationContext, symbol: PsSymbol):
return Parameter(symbol.name, symbol.get_dtype(), props)
def _get_function_params(
ctx: KernelCreationContext, symbols: Iterable[PsSymbol]
) -> list[Parameter]:
params: list[Parameter] = [_symbol_to_param(ctx, s) for s in symbols]
def _get_function_params(self, ast: PsAstNode) -> list[Parameter]:
symbols = collect_undefined_symbols(ast)
params: list[Parameter] = [self._symbol_to_param(s) for s in symbols]
params.sort(key=lambda p: p.name)
return params
def _get_headers(
ctx: KernelCreationContext, platform: Platform, body: PsBlock
) -> set[str]:
def _get_headers(self, platform: Platform, body: PsBlock) -> set[str]:
req_headers = collect_required_headers(body)
req_headers |= platform.required_headers
req_headers |= ctx.required_headers
req_headers |= self._ctx.required_headers
return req_headers
......
......@@ -4,21 +4,12 @@ from typing import Sequence, Any
from .parameters import Parameter
from ..types import PsType
from ..backend.kernelcreation import KernelCreationContext
from ..backend.ast.expressions import PsExpression
class Lambda:
"""A one-line function emitted by the code generator as an auxiliary object."""
@staticmethod
def from_expression(ctx: KernelCreationContext, expr: PsExpression):
from ..backend.ast.analysis import collect_undefined_symbols
from .driver import _get_function_params
params = _get_function_params(ctx, collect_undefined_symbols(expr))
return Lambda(expr, params)
def __init__(self, expr: PsExpression, params: Sequence[Parameter]):
self._expr = expr
self._params = tuple(params)
......
......@@ -228,8 +228,10 @@ class GpuIndexing:
self._manual_launch_grid = manual_launch_grid
from ..backend.kernelcreation import AstFactory
from .driver import KernelFactory
self._factory = AstFactory(self._ctx)
self._ast_factory = AstFactory(self._ctx)
self._kernel_factory = KernelFactory(self._ctx)
def get_thread_mapping(self) -> ThreadMapping:
"""Retrieve a thread mapping object for use by the backend"""
......@@ -267,7 +269,7 @@ class GpuIndexing:
num_work_items = cast(
_Dim3Lambda,
tuple(Lambda.from_expression(self._ctx, wit) for wit in work_items_expr),
tuple(self._kernel_factory.create_lambda(wit) for wit in work_items_expr),
)
def factory():
......@@ -305,15 +307,15 @@ class GpuIndexing:
raise ValueError(f"Iteration space rank is too large: {rank}")
block_size = (
Lambda.from_expression(self._ctx, work_items[0]),
Lambda.from_expression(self._ctx, self._factory.parse_index(1)),
Lambda.from_expression(self._ctx, self._factory.parse_index(1)),
self._kernel_factory.create_lambda(work_items[0]),
self._kernel_factory.create_lambda(self._ast_factory.parse_index(1)),
self._kernel_factory.create_lambda(self._ast_factory.parse_index(1)),
)
grid_size = tuple(
Lambda.from_expression(self._ctx, wit) for wit in work_items[1:]
self._kernel_factory.create_lambda(wit) for wit in work_items[1:]
) + tuple(
Lambda.from_expression(self._ctx, self._factory.parse_index(1))
self._kernel_factory.create_lambda(self._ast_factory.parse_index(1))
for _ in range(4 - rank)
)
......@@ -350,7 +352,7 @@ class GpuIndexing:
return tuple(ispace.actual_iterations(dim) for dim in dimensions)
case SparseIterationSpace():
return (self._factory.parse_index(ispace.index_list.shape[0]),)
return (self._ast_factory.parse_index(ispace.index_list.shape[0]),)
case _:
assert False, "unexpected iteration space"
......@@ -89,10 +89,13 @@ class Target(Flag):
GPU = CUDA
"""Alias for `Target.CUDA`, for backward compatibility."""
SYCL = _GPU | _SYCL
SYCL = _SYCL
"""SYCL kernel target.
Generate a function to be called within a SYCL parallel command.
.. note::
The SYCL target is experimental and not thoroughly tested yet.
"""
def is_automatic(self) -> bool:
......
"""
Since we don't have a JIT compiler for SYCL, these tests can only
perform dry-dock testing.
If the SYCL target should ever become non-experimental, we need to
find a way to properly test SYCL kernels in execution.
These tests primarily check that the code generation driver runs
successfully for the SYCL target.
"""
import sympy as sp
from pystencils import (
create_kernel,
Target,
fields,
Assignment,
CreateKernelConfig,
)
def test_sycl_kernel_static():
src, dst = fields("src, dst: [2D]")
asm = Assignment(dst.center(), sp.sin(src.center()) + sp.cos(src.center()))
cfg = CreateKernelConfig(target=Target.SYCL)
kernel = create_kernel(asm, cfg)
code_string = kernel.get_c_code()
assert "sycl::id< 2 >" in code_string
assert "sycl::sin(" in code_string
assert "sycl::cos(" in code_string
def test_sycl_kernel_manual_block_size():
src, dst = fields("src, dst: [2D]")
asm = Assignment(dst.center(), sp.sin(src.center()) + sp.cos(src.center()))
cfg = CreateKernelConfig(target=Target.SYCL)
cfg.sycl.automatic_block_size = False
kernel = create_kernel(asm, cfg)
code_string = kernel.get_c_code()
assert "sycl::nd_item< 2 >" in code_string
......@@ -20,7 +20,7 @@ from pystencils.backend.transformations import (
LowerToC,
)
from pystencils.backend.constants import PsConstant
from pystencils.codegen.driver import create_cpu_kernel_function
from pystencils.codegen.driver import KernelFactory
from pystencils.jit import LegacyCpuJit
from pystencils import Target, fields, Assignment, Field
from pystencils.field import create_numpy_array_with_layout
......@@ -135,8 +135,8 @@ def create_vector_kernel(
lower = LowerToC(ctx)
loop_nest = lower(loop_nest)
func = create_cpu_kernel_function(
ctx,
kfactory = KernelFactory(ctx)
func = kfactory.create_generic_kernel(
platform,
PsBlock([loop_nest]),
"vector_kernel",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment