Fix indexing for AMD GPUs
All threads resolved!
All threads resolved!
Compare changes
+ 33
− 23
@@ -97,11 +97,14 @@ class BlockIndexing(AbstractIndexing):
@@ -97,11 +97,14 @@ class BlockIndexing(AbstractIndexing):
permute_block_size_dependent_on_layout: if True the block_size is permuted such that the fastest coordinate
compile_time_block_size: compile in concrete block size, otherwise the gpu variable 'blockDim' is used
@@ -110,17 +113,22 @@ class BlockIndexing(AbstractIndexing):
@@ -110,17 +113,22 @@ class BlockIndexing(AbstractIndexing):
@@ -178,32 +186,34 @@ class BlockIndexing(AbstractIndexing):
@@ -178,32 +186,34 @@ class BlockIndexing(AbstractIndexing):
def limit_block_size_by_register_restriction(block_size, required_registers_per_thread, device=None):
This is not done automatically, since the required_registers_per_thread are not known before compilation.