Skip to content
Snippets Groups Projects
Commit d09e0966 authored by Martin Bauer's avatar Martin Bauer
Browse files

Added non-constant field-sizes vectorization support to outer interface

parent 4a8a2343
No related branches found
No related tags found
No related merge requests found
...@@ -86,12 +86,23 @@ Simplifications / Transformations: ...@@ -86,12 +86,23 @@ Simplifications / Transformations:
Field size information: Field size information:
- ``pdf_arr=None``: pass a numpy array here to create kernels with fixed size and create the loop nest according - ``pdf_arr=None``: pass a numpy array here to create kernels with fixed size and create the loop nest according
to layout of this array to layout of this array
- ``field_size=None``: create kernel for fixed field size - ``field_size=None``: create kernel for fixed field size
- ``field_layout='c'``: ``'c'`` or ``'numpy'`` for standard numpy layout, ``'reverse_numpy'`` or ``'f'`` for fortran - ``field_layout='c'``: ``'c'`` or ``'numpy'`` for standard numpy layout, ``'reverse_numpy'`` or ``'f'`` for fortran
layout, this does not apply when pdf_arr was given, then the same layout as pdf_arr is used layout, this does not apply when pdf_arr was given, then the same layout as pdf_arr is used
CPU:
- ``openmp=True``: Can be a boolean to turn multi threading on/off, or an integer
specifying the number of threads. If True is specified OpenMP chooses the number of threads
- ``vectorization=False``: controls manual vectorization using SIMD instrinsics. If True default vectorization settings
are use. Alternatively a dictionary with parameters for vectorize function can be passed. For example
``{'instruction_set': 'avx', 'assume_aligned': True, 'nontemporal': True}``. Nontemporal stores are only used if
assume_aligned is also activated.
GPU: GPU:
- ``target='cpu'``: ``'cpu'`` or ``'gpu'``, last option requires a CUDA enabled graphics card - ``target='cpu'``: ``'cpu'`` or ``'gpu'``, last option requires a CUDA enabled graphics card
...@@ -100,6 +111,7 @@ GPU: ...@@ -100,6 +111,7 @@ GPU:
- ``gpu_indexing_params='block'``: parameters passed to init function of gpu indexing. - ``gpu_indexing_params='block'``: parameters passed to init function of gpu indexing.
For ``'block'`` indexing one can e.g. specify the block size ``{'block_size' : (128, 4, 1)}`` For ``'block'`` indexing one can e.g. specify the block size ``{'block_size' : (128, 4, 1)}``
Other: Other:
- ``openmp=True``: only applicable for cpu simulations. Can be a boolean to turn multi threading on/off, or an integer - ``openmp=True``: only applicable for cpu simulations. Can be a boolean to turn multi threading on/off, or an integer
......
...@@ -19,7 +19,7 @@ class LatticeBoltzmannStep: ...@@ -19,7 +19,7 @@ class LatticeBoltzmannStep:
velocity_data_name=None, density_data_name=None, density_data_index=None, velocity_data_name=None, density_data_name=None, density_data_index=None,
compute_velocity_in_every_step=False, compute_density_in_every_step=False, compute_velocity_in_every_step=False, compute_density_in_every_step=False,
velocity_input_array_name=None, time_step_order='stream_collide', flag_interface=None, velocity_input_array_name=None, time_step_order='stream_collide', flag_interface=None,
**method_parameters): alignment_if_vectorized=64, fixed_loop_sizes=True, **method_parameters):
# --- Parameter normalization --- # --- Parameter normalization ---
if data_handling is not None: if data_handling is not None:
...@@ -60,7 +60,7 @@ class LatticeBoltzmannStep: ...@@ -60,7 +60,7 @@ class LatticeBoltzmannStep:
alignment = False alignment = False
if optimization['target'] == 'cpu' and optimization['vectorization']: if optimization['target'] == 'cpu' and optimization['vectorization']:
alignment = 128 alignment = alignment_if_vectorized
self._data_handling.add_array(self._pdf_arr_name, values_per_cell=q, gpu=self._gpu, layout=layout, self._data_handling.add_array(self._pdf_arr_name, values_per_cell=q, gpu=self._gpu, layout=layout,
latex_name='src', dtype=field_dtype, alignment=alignment) latex_name='src', dtype=field_dtype, alignment=alignment)
...@@ -94,7 +94,8 @@ class LatticeBoltzmannStep: ...@@ -94,7 +94,8 @@ class LatticeBoltzmannStep:
# --- Kernel creation --- # --- Kernel creation ---
if lbm_kernel is None: if lbm_kernel is None:
switch_to_symbolic_relaxation_rates_for_omega_adapting_methods(method_parameters, self.kernel_params) switch_to_symbolic_relaxation_rates_for_omega_adapting_methods(method_parameters, self.kernel_params)
optimization['symbolic_field'] = data_handling.fields[self._pdf_arr_name] if fixed_loop_sizes:
optimization['symbolic_field'] = data_handling.fields[self._pdf_arr_name]
method_parameters['field_name'] = self._pdf_arr_name method_parameters['field_name'] = self._pdf_arr_name
method_parameters['temporary_field_name'] = self._tmp_arr_name method_parameters['temporary_field_name'] = self._tmp_arr_name
if time_step_order == 'stream_collide': if time_step_order == 'stream_collide':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment