From ba7b20ac7e1021dc5fd7007637ab4597cc0dd453 Mon Sep 17 00:00:00 2001 From: Stephan Seitz <stephan.seitz@fau.de> Date: Thu, 12 Dec 2019 15:00:41 +0100 Subject: [PATCH] Add 'cuda' compiler config (with preferred_block_size and always_autotune) --- pystencils/cpu/cpujit.py | 12 +++++++++++- pystencils/gpucuda/cudajit.py | 3 ++- pystencils/gpucuda/indexing.py | 9 +++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py index 6376ffb85..80d8b96ff 100644 --- a/pystencils/cpu/cpujit.py +++ b/pystencils/cpu/cpujit.py @@ -175,9 +175,15 @@ def read_config(): ('object_cache', os.path.join(user_cache_dir('pystencils'), 'objectcache')), ('clear_cache_on_start', False), ]) + default_cuda_config = OrderedDict([ + ('always_autotune', False), + ('preferred_block_size', (16, 16, 1)), + ]) default_config = OrderedDict([('compiler', default_compiler_config), - ('cache', default_cache_config)]) + ('cache', default_cache_config), + ('cuda', default_cuda_config) + ]) config_path, config_exists = get_configuration_file_path() config = default_config.copy() @@ -219,6 +225,10 @@ def get_cache_config(): return _config['cache'] +def get_cuda_config(): + return _config['cuda'] + + def add_or_change_compiler_flags(flags): if not isinstance(flags, list) and not isinstance(flags, tuple): flags = [flags] diff --git a/pystencils/gpucuda/cudajit.py b/pystencils/gpucuda/cudajit.py index 638010609..c249a6756 100644 --- a/pystencils/gpucuda/cudajit.py +++ b/pystencils/gpucuda/cudajit.py @@ -4,6 +4,7 @@ import numpy as np import pystencils from pystencils.backends.cbackend import generate_c, get_headers +from pystencils.cpu.cpujit import get_cuda_config from pystencils.data_types import StructType from pystencils.field import FieldType from pystencils.gpucuda.texture_utils import ndarray_to_tex @@ -88,7 +89,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen tex.filter_mode, tex.use_normalized_coordinates, tex.read_as_integer) args = _build_numpy_argument_list(parameters, full_arguments) indexing = kernel_function_node.indexing - if kernel_function_node.do_cudaautotune: + if kernel_function_node.do_cudaautotune or get_cuda_config()['always_autotune']: block_and_thread_numbers = ( indexing.autotune_call_parameters(partial(func, *args), shape, diff --git a/pystencils/gpucuda/indexing.py b/pystencils/gpucuda/indexing.py index bf8b53027..0cd18e9ba 100644 --- a/pystencils/gpucuda/indexing.py +++ b/pystencils/gpucuda/indexing.py @@ -7,6 +7,7 @@ from sympy.core.cache import cacheit from pystencils.astnodes import Block, Conditional from pystencils.cache import disk_cache +from pystencils.cpu.cpujit import get_cuda_config from pystencils.data_types import TypedSymbol, create_type from pystencils.integer_functions import div_ceil, div_floor from pystencils.slicing import normalize_slice @@ -130,7 +131,7 @@ class AbstractIndexing(abc.ABC): current_best = block_and_thread_numbers print(f'{current_best} is the best out of {self._autotune_block_sizes or self.AUTOTUNE_BLOCK_SIZES}') - self._block_size = current_best + self._block_size = current_best['block'] return current_best return _autotune_call_parameters(self, call_shape, @@ -157,7 +158,10 @@ class BlockIndexing(AbstractIndexing): AUTOTUNE_NUM_CALLS = 10 def __init__(self, field, iteration_slice, - block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True, compile_time_block_size=False, + block_size=tuple(get_cuda_config()['preferred_block_size']), + permute_block_size_dependent_on_layout=True, + compile_time_block_size=False, + maximum_block_size=(1024, 1024, 64)): if field.spatial_dimensions > 3: raise NotImplementedError("This indexing scheme supports at most 3 spatial dimensions") @@ -304,6 +308,7 @@ class LineIndexing(AbstractIndexing): self._coordinates = coordinates self._iterationSlice = normalize_slice(iteration_slice, field.spatial_shape) self._symbolicShape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape] + self._autotune_block_sizes = None @property def coordinates(self): -- GitLab