Skip to content
Snippets Groups Projects
Commit a3fd0c24 authored by Richard Angersbach's avatar Richard Angersbach
Browse files

Fix type for ceil_to_multiple in automatic launch config lambda

parent 3ad5b4a4
No related branches found
No related tags found
1 merge request!454Optimization for GPU block size determination
Pipeline #76541 failed
...@@ -11,6 +11,7 @@ from .parameters import Parameter ...@@ -11,6 +11,7 @@ from .parameters import Parameter
from .errors import CodegenError from .errors import CodegenError
from .config import GpuIndexingScheme from .config import GpuIndexingScheme
from .target import Target from .target import Target
from ..backend.constants import PsConstant
from ..backend.kernelcreation import ( from ..backend.kernelcreation import (
KernelCreationContext, KernelCreationContext,
...@@ -604,7 +605,9 @@ class GpuIndexing: ...@@ -604,7 +605,9 @@ class GpuIndexing:
# -> round block size in fastest moving dimension up to multiple of warp size # -> round block size in fastest moving dimension up to multiple of warp size
rounded_block_size: PsExpression rounded_block_size: PsExpression
if self._assume_warp_aligned_block_size: if self._assume_warp_aligned_block_size:
rounded_block_size = ceil_to_multiple(work_items[0], self._hw_props.warp_size) rounded_block_size = ceil_to_multiple(
work_items[0],
PsExpression.make(PsConstant(self._hw_props.warp_size, work_items[0].dtype)))
else: else:
rounded_block_size = work_items[0] rounded_block_size = work_items[0]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment