Fix type for ceil_to_multiple in automatic launch config lambda

a3fd0c24 · Richard Angersbach · 3ad5b4a4 · a3fd0c24
Commit a3fd0c24 authored 4 months ago by Richard Angersbach
--- a/src/pystencils/codegen/gpu_indexing.py
+++ b/src/pystencils/codegen/gpu_indexing.py
@@ -11,6 +11,7 @@ from .parameters import Parameter
 from .errors import CodegenError
 from .config import GpuIndexingScheme
 from .target import Target
+from ..backend.constants import PsConstant
 from ..backend.kernelcreation import (
    KernelCreationContext,
@@ -604,7 +605,9 @@ class GpuIndexing:
        # -> round block size in fastest moving dimension up to multiple of warp size
        rounded_block_size: PsExpression
        if self._assume_warp_aligned_block_size:
-            rounded_block_size = ceil_to_multiple(work_items[0], self._hw_props.warp_size)
+            rounded_block_size = ceil_to_multiple(
+                work_items[0],
+                PsExpression.make(PsConstant(self._hw_props.warp_size, work_items[0].dtype)))
        else:
            rounded_block_size = work_items[0]