From f65cd9bf27082525cfb97cf3958bda75bae4719e Mon Sep 17 00:00:00 2001 From: zy69guqi <richard.angersbach@fau.de> Date: Tue, 18 Mar 2025 12:50:14 +0100 Subject: [PATCH] Create expression for ceil_to_multiple manually --- src/pystencils/codegen/gpu_indexing.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pystencils/codegen/gpu_indexing.py b/src/pystencils/codegen/gpu_indexing.py index bcffbbcae..d5f0aead2 100644 --- a/src/pystencils/codegen/gpu_indexing.py +++ b/src/pystencils/codegen/gpu_indexing.py @@ -20,7 +20,7 @@ from ..backend.kernelcreation import ( ) from ..backend.platforms.cuda import ThreadMapping -from ..backend.ast.expressions import PsExpression +from ..backend.ast.expressions import PsExpression, PsIntDiv from math import prod from ..utils import ceil_to_multiple @@ -605,9 +605,10 @@ class GpuIndexing: # -> round block size in fastest moving dimension up to multiple of warp size rounded_block_size: PsExpression if self._assume_warp_aligned_block_size: - rounded_block_size = ceil_to_multiple( - work_items[0], - PsExpression.make(PsConstant(self._hw_props.warp_size, work_items[0].dtype))) + warp_size = self._ast_factory.parse_index(self._hw_props.warp_size) + rounded_block_size = self._ast_factory.parse_index( + PsIntDiv(work_items[0].clone() + warp_size.clone() - self._ast_factory.parse_index(1), + warp_size.clone()) * warp_size.clone()) else: rounded_block_size = work_items[0] -- GitLab