Skip to content
Snippets Groups Projects
Commit bd47d369 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Address review comments

parent 22587461
No related branches found
No related tags found
1 merge request!336Remove pystencils.GPU_DEVICE
Pipeline #54320 failed
...@@ -23,7 +23,8 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -23,7 +23,8 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_layout: str = 'SoA', default_layout: str = 'SoA',
default_target: Target = Target.CPU, default_target: Target = Target.CPU,
parallel: bool = False, parallel: bool = False,
default_ghost_layers: int = 1) -> DataHandling: default_ghost_layers: int = 1,
device_number: Union[int, None] = None) -> DataHandling:
"""Creates a data handling instance. """Creates a data handling instance.
Args: Args:
...@@ -34,6 +35,9 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -34,6 +35,9 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_target: `Target` default_target: `Target`
parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array' default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
device_number: If `default_target` is set to 'GPU' and `parallel` is False, a device number should be
specified. If none is given, the device with the largest amount of memory is used. If multiple
devices have the same amount of memory, the one with the lower number is used
""" """
if isinstance(default_target, str): if isinstance(default_target, str):
new_target = Target[default_target.upper()] new_target = Target[default_target.upper()]
...@@ -69,7 +73,8 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -69,7 +73,8 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity=periodicity, periodicity=periodicity,
default_target=default_target, default_target=default_target,
default_layout=default_layout, default_layout=default_layout,
default_ghost_layers=default_ghost_layers) default_ghost_layers=default_ghost_layers,
device_number=device_number)
__all__ = ['create_data_handling'] __all__ = ['create_data_handling']
...@@ -57,7 +57,7 @@ class SerialDataHandling(DataHandling): ...@@ -57,7 +57,7 @@ class SerialDataHandling(DataHandling):
if not array_handler: if not array_handler:
try: try:
if device_number is None: if device_number is None:
import cupy import cupy.cuda.runtime
if cupy.cuda.runtime.getDeviceCount() > 0: if cupy.cuda.runtime.getDeviceCount() > 0:
device_number = sorted(range(cupy.cuda.runtime.getDeviceCount()), device_number = sorted(range(cupy.cuda.runtime.getDeviceCount()),
key=lambda i: cupy.cuda.Device(i).mem_info[1], reverse=True)[0] key=lambda i: cupy.cuda.Device(i).mem_info[1], reverse=True)[0]
......
...@@ -75,7 +75,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen ...@@ -75,7 +75,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
try: try:
args, block_and_thread_numbers = cache[key] args, block_and_thread_numbers = cache[key]
device = set(a.device.id for a in args if type(a) is cp.ndarray) device = set(a.device.id for a in args if type(a) is cp.ndarray)
assert len(device) == 1 assert len(device) == 1, "All arrays used by a kernel need to be allocated on the same device"
with cp.cuda.Device(device.pop()): with cp.cuda.Device(device.pop()):
func(block_and_thread_numbers['grid'], block_and_thread_numbers['block'], args) func(block_and_thread_numbers['grid'], block_and_thread_numbers['block'], args)
except KeyError: except KeyError:
...@@ -92,7 +92,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen ...@@ -92,7 +92,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
cache[key] = (args, block_and_thread_numbers) cache[key] = (args, block_and_thread_numbers)
cache_values.append(kwargs) # keep objects alive such that ids remain unique cache_values.append(kwargs) # keep objects alive such that ids remain unique
device = set(a.device.id for a in args if type(a) is cp.ndarray) device = set(a.device.id for a in args if type(a) is cp.ndarray)
assert len(device) == 1 assert len(device) == 1, "All arrays used by a kernel need to be allocated on the same device"
with cp.cuda.Device(device.pop()): with cp.cuda.Device(device.pop()):
func(block_and_thread_numbers['grid'], block_and_thread_numbers['block'], args) func(block_and_thread_numbers['grid'], block_and_thread_numbers['block'], args)
# useful for debugging: # useful for debugging:
......
...@@ -16,7 +16,7 @@ except ImportError: ...@@ -16,7 +16,7 @@ except ImportError:
pytest = unittest.mock.MagicMock() pytest = unittest.mock.MagicMock()
try: try:
import cupy import cupy.cuda.runtime
device_numbers = range(cupy.cuda.runtime.getDeviceCount()) device_numbers = range(cupy.cuda.runtime.getDeviceCount())
except ImportError: except ImportError:
device_numbers = [] device_numbers = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment