Skip to content
Snippets Groups Projects
Commit 5d03477a authored by Frederik Hennig's avatar Frederik Hennig
Browse files

fix dimension order in SYCL target

parent d447eb1b
No related branches found
No related tags found
1 merge request!384Fundamental GPU Support
Pipeline #66998 passed
...@@ -10,17 +10,23 @@ from ..ast.expressions import ( ...@@ -10,17 +10,23 @@ from ..ast.expressions import (
PsExpression, PsExpression,
PsSubscript, PsSubscript,
) )
from ..kernelcreation.context import KernelCreationContext
from ..constants import PsConstant from ..constants import PsConstant
from .platform import Platform from .platform import Platform
from ..exceptions import MaterializationError from ..exceptions import MaterializationError
from ...types import PsType, PsCustomType, PsIeeeFloatType, constify from ...types import PsType, PsCustomType, PsIeeeFloatType, constify
from ...config import SyclIndexingConfig from ...config import GpuIndexingConfig
class SyclPlatform(Platform): class SyclPlatform(Platform):
def __init__(self, indexing_cfg: SyclIndexingConfig): def __init__(self, ctx: KernelCreationContext, indexing_cfg: GpuIndexingConfig | None):
self._cfg = indexing_cfg super().__init__(ctx)
self._cfg = indexing_cfg if indexing_cfg is not None else GpuIndexingConfig()
if not self._cfg.sycl_automatic_block_size:
raise ValueError("The SYCL code generator supports only automatic block sizes at the moment.")
@property @property
def required_headers(self) -> set[str]: def required_headers(self) -> set[str]:
...@@ -65,8 +71,17 @@ class SyclPlatform(Platform): ...@@ -65,8 +71,17 @@ class SyclPlatform(Platform):
id_type = PsCustomType(f"sycl::id< {rank} >", const=True) id_type = PsCustomType(f"sycl::id< {rank} >", const=True)
id_symbol = PsExpression.make(self._ctx.get_symbol("id", id_type)) id_symbol = PsExpression.make(self._ctx.get_symbol("id", id_type))
# Determine loop order by permuting dimensions
archetype_field = ispace.archetype_field
if archetype_field is not None:
loop_order = archetype_field.layout
dimensions = [ispace.dimensions[coordinate] for coordinate in loop_order]
else:
dimensions = ispace.dimensions
unpackings = [] unpackings = []
for i, dim in enumerate(ispace.dimensions): for i, dim in enumerate(dimensions[::-1]):
coord = PsExpression.make(PsConstant(i, self._ctx.index_dtype)) coord = PsExpression.make(PsConstant(i, self._ctx.index_dtype))
work_item_idx = PsSubscript(id_symbol, coord) work_item_idx = PsSubscript(id_symbol, coord)
...@@ -98,7 +113,7 @@ class SyclPlatform(Platform): ...@@ -98,7 +113,7 @@ class SyclPlatform(Platform):
return body return body
def _id_type(self, rank: int): def _id_type(self, rank: int):
if self._cfg.use_ndrange: if not self._cfg.sycl_automatic_block_size:
return PsCustomType(f"sycl::nd_item< {rank} >", const=True) return PsCustomType(f"sycl::nd_item< {rank} >", const=True)
else: else:
return PsCustomType(f"sycl::item< {rank} >", const=True) return PsCustomType(f"sycl::item< {rank} >", const=True)
......
...@@ -100,7 +100,7 @@ class VectorizationConfig: ...@@ -100,7 +100,7 @@ class VectorizationConfig:
@dataclass @dataclass
class SyclIndexingConfig: class GpuIndexingConfig:
"""Configure index translation behaviour inside kernels generated for `Target.SYCL`.""" """Configure index translation behaviour inside kernels generated for `Target.SYCL`."""
omit_range_check: bool = False omit_range_check: bool = False
...@@ -111,11 +111,14 @@ class SyclIndexingConfig: ...@@ -111,11 +111,14 @@ class SyclIndexingConfig:
This check can be discarded through this option, at your own peril. This check can be discarded through this option, at your own peril.
""" """
use_ndrange: bool = False sycl_automatic_block_size: bool = True
"""If set to `True` while generating for `Target.SYCL`, generate the kernel for execution with a ``sycl::ndrange``. """If set to `True` while generating for `Target.SYCL`, let the SYCL runtime decide on the block size.
If `use_ndrange` is set, the kernel will receive an `nd_item` instead of an `item` from which the iteration counters If set to `True`, the kernel is generated for execution via ``parallel_for``-dispatch using
are derived. a flat `sycl::range`. In this case, the GPU block size will be inferred by the SYCL runtime.
If set to `False`, the kernel will receive an `nd_item` and has to be executed using ``parallel_for``
with an `nd_range`. This allows manual specification of the block size.
""" """
...@@ -181,10 +184,10 @@ class CreateKernelConfig: ...@@ -181,10 +184,10 @@ class CreateKernelConfig:
If this parameter is set while `target` is a non-CPU target, an error will be raised. If this parameter is set while `target` is a non-CPU target, an error will be raised.
""" """
sycl_indexing: None | SyclIndexingConfig = None gpu_indexing: None | GpuIndexingConfig = None
"""Configure index translation for SYCL kernels. """Configure index translation for GPU kernels.
It this parameter is set while `target` is not `Target.SYCL`, an error will be raised. It this parameter is set while `target` is not a GPU target, an error will be raised.
""" """
def __post_init__(self): def __post_init__(self):
...@@ -217,7 +220,7 @@ class CreateKernelConfig: ...@@ -217,7 +220,7 @@ class CreateKernelConfig:
if self.cpu_optim.vectorize is not False and not self.target.is_vector_cpu(): if self.cpu_optim.vectorize is not False and not self.target.is_vector_cpu():
raise PsOptionsError(f"Cannot enable auto-vectorization for non-vector CPU target {self.target}") raise PsOptionsError(f"Cannot enable auto-vectorization for non-vector CPU target {self.target}")
if self.sycl_indexing is not None: if self.gpu_indexing is not None:
if self.target != Target.SYCL: if self.target != Target.SYCL:
raise PsOptionsError(f"`sycl_indexing` cannot be set for non-SYCL target {self.target}") raise PsOptionsError(f"`sycl_indexing` cannot be set for non-SYCL target {self.target}")
......
...@@ -80,7 +80,7 @@ class Target(Flag): ...@@ -80,7 +80,7 @@ class Target(Flag):
GPU = GenericCUDA GPU = GenericCUDA
"""Alias for backward compatibility.""" """Alias for backward compatibility."""
SYCL = _SYCL SYCL = _GPU | _SYCL
"""SYCL kernel target. """SYCL kernel target.
Generate a function to be called within a SYCL parallel command. Generate a function to be called within a SYCL parallel command.
......
...@@ -93,7 +93,7 @@ def create_kernel( ...@@ -93,7 +93,7 @@ def create_kernel(
platform = GenericCpu(ctx) platform = GenericCpu(ctx)
case Target.SYCL: case Target.SYCL:
from .backend.platforms import SyclPlatform from .backend.platforms import SyclPlatform
platform = SyclPlatform(ctx) platform = SyclPlatform(ctx, config.gpu_indexing)
case _: case _:
# TODO: CUDA/HIP platform # TODO: CUDA/HIP platform
raise NotImplementedError("Target platform not implemented") raise NotImplementedError("Target platform not implemented")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment