diff --git a/pystencils/gpu/indexing.py b/pystencils/gpu/indexing.py
index c6138616405279ed7b93454192e9dd63946c2345..e8e2e99f5eee4ab5b8b5caae6386178d9916e958 100644
--- a/pystencils/gpu/indexing.py
+++ b/pystencils/gpu/indexing.py
@@ -105,7 +105,7 @@ class BlockIndexing(AbstractIndexing):
 
     def __init__(self, field, iteration_slice,
                  block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True, compile_time_block_size=False,
-                 maximum_block_size=(1024, 1024, 64), device_number=0):
+                 maximum_block_size=(1024, 1024, 64), device_number=None):
         if field.spatial_dimensions > 3:
             raise NotImplementedError("This indexing scheme supports at most 3 spatial dimensions")
 
@@ -114,6 +114,7 @@ class BlockIndexing(AbstractIndexing):
 
         self._block_size = block_size
         if maximum_block_size == 'auto':
+            assert device_number is not None, 'If "maximum_block_size" is set to "auto" a device number must be stated'
             # Get device limits
             import cupy as cp
             # See https://github.com/cupy/cupy/issues/7676
@@ -186,27 +187,22 @@ class BlockIndexing(AbstractIndexing):
     def iteration_space(self, arr_shape):
         return _iteration_space(self._iterationSlice, arr_shape)
 
-    def limit_block_size_by_register_restriction(self, block_size, required_registers_per_thread, device_number):
+    def limit_block_size_by_register_restriction(self, block_size, required_registers_per_thread):
         """Shrinks the block_size if there are too many registers used per block.
         This is not done automatically, since the required_registers_per_thread are not known before compilation.
         They can be obtained by ``func.num_regs`` from a cupy function.
         Args:
             block_size: used block size that is target for limiting
             required_registers_per_thread: needed registers per thread
-            device_number: device number of the used GPU. By default, the zeroth device is used.
         returns: smaller block_size if too many registers are used.
         """
         import cupy as cp
 
-        if device_number != self._device_number:
-            warnings.warn(f"BlockIndexing was set up with device number: {self._device_number}, but for limiting"
-                          f"the GPU blocks to the hardware device number {device_number} was used.")
-
         # See https://github.com/cupy/cupy/issues/7676
         if cp.cuda.runtime.is_hip:
-            max_registers_per_block = cp.cuda.runtime.deviceGetAttribute(71, device_number)
+            max_registers_per_block = cp.cuda.runtime.deviceGetAttribute(71, self._device_number)
         else:
-            device = cp.cuda.Device(device_number)
+            device = cp.cuda.Device(self._device_number)
             da = device.attributes
             max_registers_per_block = da.get("MaxRegistersPerBlock")
 
diff --git a/pystencils_tests/test_gpu.py b/pystencils_tests/test_gpu.py
index aa86dcd31f955ef4fb64289fafeab804342e6e1e..b0af7950da352c633d73d7115bf1ea661f884937 100644
--- a/pystencils_tests/test_gpu.py
+++ b/pystencils_tests/test_gpu.py
@@ -163,11 +163,12 @@ def test_block_indexing():
     bi = BlockIndexing(f, make_slice[:, :, :], block_size=(32, 1, 1), permute_block_size_dependent_on_layout=False)
     assert bi.call_parameters((1, 16, 16))['block'] == (1, 16, 2)
 
-    bi = BlockIndexing(f, make_slice[:, :, :], block_size=(16, 8, 2), maximum_block_size="auto")
+    bi = BlockIndexing(f, make_slice[:, :, :], block_size=(16, 8, 2),
+                       maximum_block_size="auto", device_number=pystencils.GPU_DEVICE)
 
     # This function should be used if number of needed registers is known. Can be determined with func.num_regs
     registers_per_thread = 1000
-    blocks = bi.limit_block_size_by_register_restriction([1024, 1024, 1], registers_per_thread, pystencils.GPU_DEVICE)
+    blocks = bi.limit_block_size_by_register_restriction([1024, 1024, 1], registers_per_thread)
 
     if cp.cuda.runtime.is_hip:
         max_registers_per_block = cp.cuda.runtime.deviceGetAttribute(71, pystencils.GPU_DEVICE)