Skip to content
Snippets Groups Projects
Commit 5d03477a authored by Frederik Hennig's avatar Frederik Hennig
Browse files

fix dimension order in SYCL target

parent d447eb1b
No related branches found
No related tags found
1 merge request!384Fundamental GPU Support
Pipeline #66998 passed
......@@ -10,17 +10,23 @@ from ..ast.expressions import (
PsExpression,
PsSubscript,
)
from ..kernelcreation.context import KernelCreationContext
from ..constants import PsConstant
from .platform import Platform
from ..exceptions import MaterializationError
from ...types import PsType, PsCustomType, PsIeeeFloatType, constify
from ...config import SyclIndexingConfig
from ...config import GpuIndexingConfig
class SyclPlatform(Platform):
def __init__(self, indexing_cfg: SyclIndexingConfig):
self._cfg = indexing_cfg
def __init__(self, ctx: KernelCreationContext, indexing_cfg: GpuIndexingConfig | None):
super().__init__(ctx)
self._cfg = indexing_cfg if indexing_cfg is not None else GpuIndexingConfig()
if not self._cfg.sycl_automatic_block_size:
raise ValueError("The SYCL code generator supports only automatic block sizes at the moment.")
@property
def required_headers(self) -> set[str]:
......@@ -65,8 +71,17 @@ class SyclPlatform(Platform):
id_type = PsCustomType(f"sycl::id< {rank} >", const=True)
id_symbol = PsExpression.make(self._ctx.get_symbol("id", id_type))
# Determine loop order by permuting dimensions
archetype_field = ispace.archetype_field
if archetype_field is not None:
loop_order = archetype_field.layout
dimensions = [ispace.dimensions[coordinate] for coordinate in loop_order]
else:
dimensions = ispace.dimensions
unpackings = []
for i, dim in enumerate(ispace.dimensions):
for i, dim in enumerate(dimensions[::-1]):
coord = PsExpression.make(PsConstant(i, self._ctx.index_dtype))
work_item_idx = PsSubscript(id_symbol, coord)
......@@ -98,7 +113,7 @@ class SyclPlatform(Platform):
return body
def _id_type(self, rank: int):
if self._cfg.use_ndrange:
if not self._cfg.sycl_automatic_block_size:
return PsCustomType(f"sycl::nd_item< {rank} >", const=True)
else:
return PsCustomType(f"sycl::item< {rank} >", const=True)
......
......@@ -100,7 +100,7 @@ class VectorizationConfig:
@dataclass
class SyclIndexingConfig:
class GpuIndexingConfig:
"""Configure index translation behaviour inside kernels generated for `Target.SYCL`."""
omit_range_check: bool = False
......@@ -111,11 +111,14 @@ class SyclIndexingConfig:
This check can be discarded through this option, at your own peril.
"""
use_ndrange: bool = False
"""If set to `True` while generating for `Target.SYCL`, generate the kernel for execution with a ``sycl::ndrange``.
sycl_automatic_block_size: bool = True
"""If set to `True` while generating for `Target.SYCL`, let the SYCL runtime decide on the block size.
If `use_ndrange` is set, the kernel will receive an `nd_item` instead of an `item` from which the iteration counters
are derived.
If set to `True`, the kernel is generated for execution via ``parallel_for``-dispatch using
a flat `sycl::range`. In this case, the GPU block size will be inferred by the SYCL runtime.
If set to `False`, the kernel will receive an `nd_item` and has to be executed using ``parallel_for``
with an `nd_range`. This allows manual specification of the block size.
"""
......@@ -181,10 +184,10 @@ class CreateKernelConfig:
If this parameter is set while `target` is a non-CPU target, an error will be raised.
"""
sycl_indexing: None | SyclIndexingConfig = None
"""Configure index translation for SYCL kernels.
gpu_indexing: None | GpuIndexingConfig = None
"""Configure index translation for GPU kernels.
It this parameter is set while `target` is not `Target.SYCL`, an error will be raised.
It this parameter is set while `target` is not a GPU target, an error will be raised.
"""
def __post_init__(self):
......@@ -217,7 +220,7 @@ class CreateKernelConfig:
if self.cpu_optim.vectorize is not False and not self.target.is_vector_cpu():
raise PsOptionsError(f"Cannot enable auto-vectorization for non-vector CPU target {self.target}")
if self.sycl_indexing is not None:
if self.gpu_indexing is not None:
if self.target != Target.SYCL:
raise PsOptionsError(f"`sycl_indexing` cannot be set for non-SYCL target {self.target}")
......
......@@ -80,7 +80,7 @@ class Target(Flag):
GPU = GenericCUDA
"""Alias for backward compatibility."""
SYCL = _SYCL
SYCL = _GPU | _SYCL
"""SYCL kernel target.
Generate a function to be called within a SYCL parallel command.
......
......@@ -93,7 +93,7 @@ def create_kernel(
platform = GenericCpu(ctx)
case Target.SYCL:
from .backend.platforms import SyclPlatform
platform = SyclPlatform(ctx)
platform = SyclPlatform(ctx, config.gpu_indexing)
case _:
# TODO: CUDA/HIP platform
raise NotImplementedError("Target platform not implemented")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment