Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 66-absolute-access-is-probably-not-copied-correctly-after-_eval_subs
  • const_fix
  • fhennig/macos-and-arm
  • fhennig/random-numbers
  • fhennig/v2.0-deprecations
  • fma
  • gpu_bufferfield_fix
  • gpu_liveness_opts
  • holzer-master-patch-46757
  • hyteg
  • improved_comm
  • master
  • target_dh_refactoring
  • v2.0-dev
  • vectorization_sqrt_fix
  • zikeliml/124-rework-tutorials
  • zikeliml/Task-96-dotExporterForAST
  • last/Kerncraft
  • last/LLVM
  • last/OpenCL
  • release/0.2.1
  • release/0.2.10
  • release/0.2.11
  • release/0.2.12
  • release/0.2.13
  • release/0.2.14
  • release/0.2.15
  • release/0.2.2
  • release/0.2.3
  • release/0.2.4
  • release/0.2.6
  • release/0.2.7
  • release/0.2.8
  • release/0.2.9
  • release/0.3.0
  • release/0.3.1
  • release/0.3.2
  • release/0.3.3
  • release/0.3.4
  • release/0.4.0
  • release/0.4.1
  • release/0.4.2
  • release/0.4.3
  • release/0.4.4
  • release/1.0
  • release/1.0.1
  • release/1.1
  • release/1.1.1
  • release/1.2
  • release/1.3
  • release/1.3.1
  • release/1.3.2
  • release/1.3.3
  • release/1.3.4
  • release/1.3.5
  • release/1.3.6
  • release/1.3.7
  • release/2.0.dev0
58 results

Target

Select target project
  • anirudh.jonnalagadda/pystencils
  • hyteg/pystencils
  • jbadwaik/pystencils
  • jngrad/pystencils
  • itischler/pystencils
  • ob28imeq/pystencils
  • hoenig/pystencils
  • Bindgen/pystencils
  • hammer/pystencils
  • da15siwa/pystencils
  • holzer/pystencils
  • alexander.reinauer/pystencils
  • ec93ujoh/pystencils
  • Harke/pystencils
  • seitz/pystencils
  • pycodegen/pystencils
16 results
Select Git revision
  • VoF
  • abs
  • compare_fix
  • const_fix
  • gpu_liveness_opts
  • hyteg
  • improved_comm
  • jan_test
  • master
  • noFlux
  • philox-simd
  • target_dh_refactoring
  • test_martin
  • test_martin2
  • last/Kerncraft
  • last/LLVM
  • last/OpenCL
  • release/0.2.1
  • release/0.2.10
  • release/0.2.11
  • release/0.2.12
  • release/0.2.13
  • release/0.2.14
  • release/0.2.15
  • release/0.2.2
  • release/0.2.3
  • release/0.2.4
  • release/0.2.6
  • release/0.2.7
  • release/0.2.8
  • release/0.2.9
  • release/0.3.0
  • release/0.3.1
  • release/0.3.2
  • release/0.3.3
  • release/0.3.4
  • release/0.4.0
  • release/0.4.1
  • release/0.4.2
  • release/0.4.3
  • release/0.4.4
  • release/1.0
  • release/1.0.1
  • release/1.1
  • release/1.1.1
  • release/1.2
  • release/1.3
  • release/1.3.1
  • release/1.3.2
  • release/1.3.3
50 results
Show changes
Showing
with 239 additions and 110 deletions
......@@ -76,7 +76,7 @@ class Neumann(Boundary):
return hash("Neumann")
def __eq__(self, other):
return type(other) == Neumann
return type(other) is Neumann
class Dirichlet(Boundary):
......
......@@ -35,11 +35,11 @@ class FlagInterface:
>>> dh = create_data_handling((4, 5))
>>> fi = FlagInterface(dh, 'flag_field', np.uint8)
>>> assert dh.has_data('flag_field')
>>> fi.reserve_next_flag()
>>> int(fi.reserve_next_flag())
2
>>> fi.reserve_flag(4)
>>> int(fi.reserve_flag(4))
4
>>> fi.reserve_next_flag()
>>> int(fi.reserve_next_flag())
8
"""
......@@ -450,5 +450,6 @@ def create_boundary_kernel(field, index_field, stencil, boundary_functor, target
dir_symbol = TypedSymbol("dir", np.int32)
elements += [SympyAssignment(dir_symbol, index_field[0]('dir'))]
elements += boundary_functor(field, direction_symbol=dir_symbol, index_field=index_field)
config = CreateKernelConfig(index_fields=[index_field], target=target, **kernel_creation_args)
config = CreateKernelConfig(index_fields=[index_field], target=target, skip_independence_check=True,
**kernel_creation_args)
return create_kernel(elements, config=config)
......@@ -2,26 +2,22 @@ import warnings
import numpy as np
try:
# Try to import right away - assume compiled code is available
# compile with: python setup.py build_ext --inplace --use-cython
from pystencils.boundaries.createindexlistcython import create_boundary_neighbor_index_list_2d, \
create_boundary_neighbor_index_list_3d, create_boundary_cell_index_list_2d, create_boundary_cell_index_list_3d
import pyximport
pyximport.install(language_level=3)
cython_funcs_available = True
except ImportError:
try:
# If not, try development mode and import via pyximport
import pyximport
pyximport.install(language_level=3)
cython_funcs_available = True
except ImportError:
cython_funcs_available = False
if cython_funcs_available:
from pystencils.boundaries.createindexlistcython import create_boundary_neighbor_index_list_2d, \
create_boundary_neighbor_index_list_3d, create_boundary_cell_index_list_2d, \
create_boundary_cell_index_list_3d
cython_funcs_available = False
if cython_funcs_available:
from pystencils.boundaries.createindexlistcython import (
create_boundary_neighbor_index_list_2d,
create_boundary_neighbor_index_list_3d,
create_boundary_cell_index_list_2d,
create_boundary_cell_index_list_3d,
)
boundary_index_array_coordinate_names = ["x", "y", "z"]
direction_member_name = "dir"
......@@ -30,40 +26,59 @@ default_index_array_dtype = np.int32
def numpy_data_type_for_boundary_object(boundary_object, dim):
coordinate_names = boundary_index_array_coordinate_names[:dim]
return np.dtype([(name, default_index_array_dtype) for name in coordinate_names]
+ [(direction_member_name, default_index_array_dtype)]
+ [(i[0], i[1].numpy_dtype) for i in boundary_object.additional_data], align=True)
def _create_index_list_python(flag_field_arr, boundary_mask,
fluid_mask, stencil, single_link, inner_or_boundary=False, nr_of_ghost_layers=None):
return np.dtype(
[(name, default_index_array_dtype) for name in coordinate_names]
+ [(direction_member_name, default_index_array_dtype)]
+ [(i[0], i[1].numpy_dtype) for i in boundary_object.additional_data],
align=True,
)
def _create_index_list_python(
flag_field_arr,
boundary_mask,
fluid_mask,
stencil,
single_link,
inner_or_boundary=False,
nr_of_ghost_layers=None,
):
if inner_or_boundary and nr_of_ghost_layers is None:
raise ValueError("If inner_or_boundary is set True the number of ghost layers "
"around the inner domain has to be specified")
raise ValueError(
"If inner_or_boundary is set True the number of ghost layers "
"around the inner domain has to be specified"
)
if nr_of_ghost_layers is None:
nr_of_ghost_layers = 0
coordinate_names = boundary_index_array_coordinate_names[:len(flag_field_arr.shape)]
index_arr_dtype = np.dtype([(name, default_index_array_dtype) for name in coordinate_names]
+ [(direction_member_name, default_index_array_dtype)])
coordinate_names = boundary_index_array_coordinate_names[
: len(flag_field_arr.shape)
]
index_arr_dtype = np.dtype(
[(name, default_index_array_dtype) for name in coordinate_names]
+ [(direction_member_name, default_index_array_dtype)]
)
# boundary cells are extracted via np.where. To ensure continous memory access in the compute kernel these cells
# have to be sorted.
boundary_cells = np.transpose(np.nonzero(flag_field_arr == boundary_mask))
for i in range(len(flag_field_arr.shape)):
boundary_cells = boundary_cells[boundary_cells[:, i].argsort(kind='mergesort')]
boundary_cells = boundary_cells[boundary_cells[:, i].argsort(kind="mergesort")]
# First a set is created to save all fluid cells which are near boundary
fluid_cells = set()
for cell in boundary_cells:
cell = tuple(cell)
for dir_idx, direction in enumerate(stencil):
neighbor_cell = tuple([cell_i + dir_i for cell_i, dir_i in zip(cell, direction)])
neighbor_cell = tuple(
[cell_i + dir_i for cell_i, dir_i in zip(cell, direction)]
)
# prevent out ouf bounds access. If boundary cell is at the border, some stencil directions would be out.
if any(not 0 + nr_of_ghost_layers <= e < upper - nr_of_ghost_layers
for e, upper in zip(neighbor_cell, flag_field_arr.shape)):
if any(
not 0 + nr_of_ghost_layers <= e < upper - nr_of_ghost_layers
for e, upper in zip(neighbor_cell, flag_field_arr.shape)
):
continue
if flag_field_arr[neighbor_cell] & fluid_mask:
fluid_cells.add(neighbor_cell)
......@@ -83,9 +98,14 @@ def _create_index_list_python(flag_field_arr, boundary_mask,
cell = tuple(cell)
sum_cells = np.zeros(len(cell))
for dir_idx, direction in enumerate(stencil):
neighbor_cell = tuple([cell_i + dir_i for cell_i, dir_i in zip(cell, direction)])
neighbor_cell = tuple(
[cell_i + dir_i for cell_i, dir_i in zip(cell, direction)]
)
# prevent out ouf bounds access. If boundary cell is at the border, some stencil directions would be out.
if any(not 0 <= e < upper for e, upper in zip(neighbor_cell, flag_field_arr.shape)):
if any(
not 0 <= e < upper
for e, upper in zip(neighbor_cell, flag_field_arr.shape)
):
continue
if flag_field_arr[neighbor_cell] & checkmask:
if single_link:
......@@ -101,8 +121,15 @@ def _create_index_list_python(flag_field_arr, boundary_mask,
return np.array(result, dtype=index_arr_dtype)
def create_boundary_index_list(flag_field, stencil, boundary_mask, fluid_mask,
nr_of_ghost_layers=1, inner_or_boundary=True, single_link=False):
def create_boundary_index_list(
flag_field,
stencil,
boundary_mask,
fluid_mask,
nr_of_ghost_layers=1,
inner_or_boundary=True,
single_link=False,
):
"""Creates a numpy array storing links (connections) between domain cells and boundary cells.
Args:
......@@ -119,11 +146,20 @@ def create_boundary_index_list(flag_field, stencil, boundary_mask, fluid_mask,
"""
dim = len(flag_field.shape)
coordinate_names = boundary_index_array_coordinate_names[:dim]
index_arr_dtype = np.dtype([(name, default_index_array_dtype) for name in coordinate_names]
+ [(direction_member_name, default_index_array_dtype)])
index_arr_dtype = np.dtype(
[(name, default_index_array_dtype) for name in coordinate_names]
+ [(direction_member_name, default_index_array_dtype)]
)
stencil = np.array(stencil, dtype=default_index_array_dtype)
args = (flag_field, nr_of_ghost_layers, boundary_mask, fluid_mask, stencil, single_link)
args = (
flag_field,
nr_of_ghost_layers,
boundary_mask,
fluid_mask,
stencil,
single_link,
)
args_no_gl = (flag_field, boundary_mask, fluid_mask, stencil, single_link)
if cython_funcs_available:
......@@ -142,22 +178,42 @@ def create_boundary_index_list(flag_field, stencil, boundary_mask, fluid_mask,
return np.array(idx_list, dtype=index_arr_dtype)
else:
if flag_field.size > 1e6:
warnings.warn("Boundary setup may take very long! Consider installing cython to speed it up")
return _create_index_list_python(*args_no_gl, inner_or_boundary=inner_or_boundary,
nr_of_ghost_layers=nr_of_ghost_layers)
def create_boundary_index_array(flag_field, stencil, boundary_mask, fluid_mask, boundary_object,
nr_of_ghost_layers=1, inner_or_boundary=True, single_link=False):
idx_array = create_boundary_index_list(flag_field, stencil, boundary_mask, fluid_mask,
nr_of_ghost_layers, inner_or_boundary, single_link)
warnings.warn(
"Boundary setup may take very long! Consider installing cython to speed it up"
)
return _create_index_list_python(
*args_no_gl,
inner_or_boundary=inner_or_boundary,
nr_of_ghost_layers=nr_of_ghost_layers,
)
def create_boundary_index_array(
flag_field,
stencil,
boundary_mask,
fluid_mask,
boundary_object,
nr_of_ghost_layers=1,
inner_or_boundary=True,
single_link=False,
):
idx_array = create_boundary_index_list(
flag_field,
stencil,
boundary_mask,
fluid_mask,
nr_of_ghost_layers,
inner_or_boundary,
single_link,
)
dim = len(flag_field.shape)
if boundary_object.additional_data:
coordinate_names = boundary_index_array_coordinate_names[:dim]
index_arr_dtype = numpy_data_type_for_boundary_object(boundary_object, dim)
extended_idx_field = np.empty(len(idx_array), dtype=index_arr_dtype)
for prop in coordinate_names + ['dir']:
for prop in coordinate_names + ["dir"]:
extended_idx_field[prop] = idx_array[prop]
idx_array = extended_idx_field
......
# distutils: language=c
# Workaround for cython bug
# see https://stackoverflow.com/questions/8024805/cython-compiled-c-extension-importerror-dynamic-module-does-not-define-init-fu
WORKAROUND = "Something"
# cython: language_level=3str
import cython
......
File moved
......@@ -2,7 +2,7 @@ from copy import copy
from collections import defaultdict
from dataclasses import dataclass, field
from types import MappingProxyType
from typing import Union, Tuple, List, Dict, Callable, Any, DefaultDict
from typing import Union, Tuple, List, Dict, Callable, Any, DefaultDict, Iterable
from pystencils import Target, Backend, Field
from pystencils.typing.typed_sympy import BasicType
......@@ -78,6 +78,15 @@ class CreateKernelConfig:
"""
If OpenMP is active: whether multiple outer loops are permitted
"""
base_pointer_specification: Union[List[Iterable[str]], List[Iterable[int]]] = None
"""
Specification of how many and which intermediate pointers are created for a field access.
For example [ (0), (2,3,)] creates on base pointer for coordinates 2 and 3 and writes the offset for coordinate
zero directly in the field access. These specifications are defined dependent on the loop ordering.
This function translates more readable version into the specification above.
For more information see: `pystencils.transformations.create_intermediate_base_pointer`
"""
gpu_indexing: str = 'block'
"""
Either 'block' or 'line' , or custom indexing class, see `pystencils.gpu.AbstractIndexing`
......@@ -126,8 +135,9 @@ class CreateKernelConfig:
"""
skip_independence_check: bool = False
"""
Don't check that loop iterations are independent. This is needed e.g. for
periodicity kernel, that access the field outside the iteration bounds. Use with care!
By default the assignment list is checked for read/write independence. This means fields are only written at
locations where they are read. Doing so guarantees thread safety. In some cases e.g. for
periodicity kernel, this can not be assured and does the check needs to be deactivated. Use with care!
"""
class DataTypeFactory:
......
from pystencils.cpu.cpujit import make_python_function
from pystencils.cpu.kernelcreation import add_openmp, create_indexed_kernel, create_kernel
from pystencils.cpu.kernelcreation import add_openmp, create_indexed_kernel, create_kernel, add_pragmas
__all__ = ['create_kernel', 'create_indexed_kernel', 'add_openmp', 'make_python_function']
__all__ = ['create_kernel', 'create_indexed_kernel', 'add_openmp', 'add_pragmas', 'make_python_function']
......@@ -57,12 +57,14 @@ import tempfile
import textwrap
import time
import warnings
import pathlib
import numpy as np
from pystencils import FieldType
from pystencils.astnodes import LoopOverCoordinate
from pystencils.backends.cbackend import generate_c, get_headers, CFunction
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
from pystencils.cpu.msvc_detection import get_environment
from pystencils.include import get_pystencils_include_path
from pystencils.kernel_wrapper import KernelWrapper
......@@ -122,15 +124,15 @@ def get_configuration_file_path():
# 1) Read path from environment variable if found
if 'PYSTENCILS_CONFIG' in os.environ:
return os.environ['PYSTENCILS_CONFIG'], True
return os.environ['PYSTENCILS_CONFIG']
# 2) Look in current directory for pystencils.json
elif os.path.exists("pystencils.json"):
return "pystencils.json", True
return "pystencils.json"
# 3) Try ~/.pystencils.json
elif os.path.exists(config_path_in_home):
return config_path_in_home, True
return config_path_in_home
else:
return config_path_in_home, False
return config_path_in_home
def create_folder(path, is_file):
......@@ -172,7 +174,11 @@ def read_config():
('restrict_qualifier', '__restrict__')
])
if platform.machine() == 'arm64':
default_compiler_config['flags'] = default_compiler_config['flags'].replace('-march=native ', '')
if 'sme' in get_supported_instruction_sets():
flag = '-march=armv8.7-a+sme '
else:
flag = ''
default_compiler_config['flags'] = default_compiler_config['flags'].replace('-march=native ', flag)
for libomp in ['/opt/local/lib/libomp/libomp.dylib', '/usr/local/lib/libomp.dylib',
'/opt/homebrew/lib/libomp.dylib']:
if os.path.exists(libomp):
......@@ -190,16 +196,22 @@ def read_config():
default_config = OrderedDict([('compiler', default_compiler_config),
('cache', default_cache_config)])
config_path, config_exists = get_configuration_file_path()
from fasteners import InterProcessLock
config_path = pathlib.Path(get_configuration_file_path())
config_path.parent.mkdir(parents=True, exist_ok=True)
config = default_config.copy()
if config_exists:
with open(config_path, 'r') as json_config_file:
loaded_config = json.load(json_config_file)
config = recursive_dict_update(config, loaded_config)
else:
create_folder(config_path, True)
with open(config_path, 'w') as f:
json.dump(config, f, indent=4)
lockfile = config_path.with_suffix(config_path.suffix + ".lock")
with InterProcessLock(lockfile):
if config_path.exists():
with open(config_path, 'r') as json_config_file:
loaded_config = json.load(json_config_file)
config = recursive_dict_update(config, loaded_config)
else:
with open(config_path, 'w') as f:
json.dump(config, f, indent=4)
if config['cache']['object_cache'] is not False:
config['cache']['object_cache'] = os.path.expanduser(config['cache']['object_cache']).format(pid=os.getpid())
......@@ -447,8 +459,6 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
parameters.append(f"buffer_{field.name}.strides[{param.symbol.coordinate}] / {item_size}")
elif param.is_field_shape:
parameters.append(f"buffer_{param.field_name}.shape[{param.symbol.coordinate}]")
elif type(param.symbol) is CFunction:
continue
else:
extract_function, target_type = type_mapping[param.symbol.dtype.numpy_dtype.type]
pre_call_code += template_extract_scalar.format(extract_function=extract_function,
......@@ -617,7 +627,12 @@ def compile_and_load(ast, custom_backend=None):
cache_config = get_cache_config()
compiler_config = get_compiler_config()
function_prefix = '__declspec(dllexport)' if compiler_config['os'].lower() == 'windows' else ''
if compiler_config['os'].lower() == 'windows':
function_prefix = '__declspec(dllexport)'
elif ast.instruction_set and 'function_prefix' in ast.instruction_set:
function_prefix = ast.instruction_set['function_prefix']
else:
function_prefix = ''
code = ExtensionModuleCode(custom_backend=custom_backend)
code.add_function(ast, ast.function_name)
......
from typing import Union
import sympy as sp
import pystencils.astnodes as ast
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.config import CreateKernelConfig
from pystencils.enums import Target, Backend
from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
......@@ -13,12 +10,13 @@ from pystencils.typing.transformations import add_types
from pystencils.field import Field, FieldType
from pystencils.node_collection import NodeCollection
from pystencils.transformations import (
filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering, make_loop_over_domain,
filtered_tree_iteration, iterate_loops_by_depth, get_base_buffer_index, get_optimal_loop_ordering,
make_loop_over_domain, add_outer_loop_over_indexed_elements,
move_constants_before_loop, parse_base_pointer_info, resolve_buffer_accesses,
resolve_field_accesses, split_inner_loop)
def create_kernel(assignments: Union[AssignmentCollection, NodeCollection],
def create_kernel(assignments: NodeCollection,
config: CreateKernelConfig) -> KernelFunction:
"""Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
......@@ -56,6 +54,8 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection],
loop_order = get_optimal_loop_ordering(fields_without_buffers)
loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice,
ghost_layers=ghost_layers, loop_order=loop_order)
loop_node = add_outer_loop_over_indexed_elements(loop_node)
ast_node = KernelFunction(loop_node, Target.CPU, Backend.C, compile_function=make_python_function,
ghost_layers=ghost_layer_info, function_name=function_name, assignments=assignments)
......@@ -76,7 +76,9 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection],
typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups]
split_inner_loop(ast_node, typed_split_groups)
base_pointer_spec = [['spatialInner0'], ['spatialInner1']] if len(loop_order) >= 2 else [['spatialInner0']]
base_pointer_spec = config.base_pointer_specification
if base_pointer_spec is None:
base_pointer_spec = []
base_pointer_info = {field.name: parse_base_pointer_info(base_pointer_spec, loop_order,
field.spatial_dimensions, field.index_dimensions)
for field in fields_without_buffers}
......@@ -94,7 +96,7 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection],
return ast_node
def create_indexed_kernel(assignments: Union[AssignmentCollection, NodeCollection],
def create_indexed_kernel(assignments: NodeCollection,
config: CreateKernelConfig) -> KernelFunction:
"""
Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
......@@ -115,21 +117,24 @@ def create_indexed_kernel(assignments: Union[AssignmentCollection, NodeCollectio
fields_written = assignments.bound_fields
fields_read = assignments.rhs_fields
all_fields = fields_read.union(fields_written)
# extract the index fields based on the name. The original index field might have been modified
index_fields = [idx_field for idx_field in index_fields if idx_field.name in [f.name for f in all_fields]]
non_index_fields = [f for f in all_fields if f not in index_fields]
spatial_coordinates = {f.spatial_dimensions for f in non_index_fields}
assert len(spatial_coordinates) == 1, f"Non-index fields do not have the same number of spatial coordinates " \
f"Non index fields are {non_index_fields}, spatial coordinates are " \
f"{spatial_coordinates}"
spatial_coordinates = list(spatial_coordinates)[0]
assignments = assignments.all_assignments
assignments = add_types(assignments, config)
all_fields = fields_read.union(fields_written)
for index_field in index_fields:
index_field.field_type = FieldType.INDEXED
assert FieldType.is_indexed(index_field)
assert index_field.spatial_dimensions == 1, "Index fields have to be 1D"
non_index_fields = [f for f in all_fields if f not in index_fields]
spatial_coordinates = {f.spatial_dimensions for f in non_index_fields}
assert len(spatial_coordinates) == 1, "Non-index fields do not have the same number of spatial coordinates"
spatial_coordinates = list(spatial_coordinates)[0]
def get_coordinate_symbol_assignment(name):
for idx_field in index_fields:
assert isinstance(idx_field.dtype, StructType), "Index fields have to have a struct data type"
......@@ -211,3 +216,18 @@ def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None, ass
if collapse:
prefix += f" collapse({collapse})"
loop_to_parallelize.prefix_lines.append(prefix)
def add_pragmas(ast_node, pragma_lines, nesting_depth=-1):
"""Prepends given pragma lines to all loops of specified nesting depth.
Args:
ast_node: pystencils abstract syntax tree
pragma_lines: Iterable of strings containing the pragma lines
nesting_depth: Nesting depth of the loops the pragmas should be applied to.
Outermost loop has depth 0.
A depth of -1 indicates the innermost loops.
"""
loop_nodes = iterate_loops_by_depth(ast_node, nesting_depth)
for n in loop_nodes:
n.prefix_lines += list(pragma_lines)
......@@ -130,7 +130,7 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
if nontemporal and 'cachelineZero' in vector_is:
kernel_ast.use_all_written_field_sizes = True
strided = 'storeS' in vector_is and 'loadS' in vector_is
keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU']
keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned and 'storeA' in vector_is else 'storeU']
vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal,
strided, keep_loop_stop, assume_sufficient_line_padding,
default_float_type)
......@@ -140,12 +140,15 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
strided, keep_loop_stop, assume_sufficient_line_padding,
default_float_type):
"""Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type."""
vector_width = ast_node.instruction_set['width']
all_loops = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment)
all_loops = list(filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment))
inner_loops = [loop for loop in all_loops if loop.is_innermost_loop]
zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops}
vector_is = ast_node.instruction_set
assert vector_is, "The ast needs to hold information about the instruction_set for the vectorisation"
vector_width = vector_is['width']
vector_int_width = vector_is['intwidth']
for loop_node in inner_loops:
loop_range = loop_node.stop - loop_node.start
......@@ -174,8 +177,15 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
for indexed in loop_node.atoms(sp.Indexed):
base, index = indexed.args
if loop_counter_symbol in index.atoms(sp.Symbol):
if 'loadA' not in vector_is and 'storeA' not in vector_is and 'maskStoreA' not in vector_is:
# don't need to generate the alignment check when there are no aligned load/store instructions
aligned_access = False
else:
if not isinstance(vector_width, int):
raise NotImplementedError('Access alignment cannot be statically determined for sizeless '
'vector ISAs')
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0
loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms()
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) == 0
stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index)
if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()):
successful = False
......@@ -204,7 +214,6 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
loop_node.step = vector_width
loop_node.subs(substitutions)
vector_int_width = ast_node.instruction_set['intwidth']
arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width))
arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)),
VectorType(loop_counter_symbol.dtype, vector_int_width))
......@@ -223,7 +232,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
substitutions.update({s[0]: s[1] for s in zip(rng.result_symbols, new_result_symbols)})
rng._symbols_defined = set(new_result_symbols)
fast_subs(loop_node, substitutions, skip=lambda e: isinstance(e, RNGBase))
insert_vector_casts(loop_node, ast_node.instruction_set, default_float_type)
insert_vector_casts(loop_node, vector_is, default_float_type)
def mask_conditionals(loop_body):
......@@ -257,6 +266,27 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
handled_functions = (sp.Add, sp.Mul, vec_any, vec_all, DivFunc, sp.Abs)
def is_scalar(expr) -> bool:
if hasattr(expr, "dtype"):
if type(expr.dtype) is VectorType:
return False
# Else branch: If expr is a CastFunc, then whether the expression
# is scalar is determined by the argument (remember: vector casts
# are not inserted yet). Therefore, we must recurse into the args of
# expr below. Otherwise, this expression is atomic and in that case
# it is assumed to be scalar below.
if isinstance(expr, ast.ResolvedFieldAccess):
# expr.field is not in expr.args
return is_scalar(expr.field)
elif isinstance(expr, (vec_any, vec_all)):
return True
if not hasattr(expr, "args"):
return True
return all(is_scalar(arg) for arg in expr.args)
# TODO Vectorization Revamp: get rid of default_type
def visit_expr(expr, default_type='double', force_vectorize=False):
if isinstance(expr, VectorMemoryAccess):
......@@ -265,7 +295,7 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
elif isinstance(expr, CastFunc):
cast_type = expr.args[1]
arg = visit_expr(expr.args[0], default_type, force_vectorize)
assert cast_type in [BasicType('float32'), BasicType('float64')],\
assert cast_type in [BasicType('float32'), BasicType('float64')], \
f'Vectorization cannot vectorize type {cast_type}'
return expr.func(arg, VectorType(cast_type, instruction_set['width']))
elif expr.func is sp.Abs and 'abs' not in instruction_set:
......@@ -369,15 +399,15 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
# If either side contains a vectorized subexpression, both sides
# must be fully vectorized.
lhs_type = get_type_of_expression(assignment.lhs)
rhs_type = get_type_of_expression(subs_expr)
lhs_vectorized = type(lhs_type) is VectorType
rhs_vectorized = type(rhs_type) is VectorType
lhs_scalar = is_scalar(assignment.lhs)
rhs_scalar = is_scalar(subs_expr)
assignment.rhs = visit_expr(subs_expr, default_type, force_vectorize=lhs_vectorized or rhs_vectorized)
assignment.rhs = visit_expr(subs_expr, default_type, force_vectorize=not (lhs_scalar and rhs_scalar))
if isinstance(assignment.lhs, TypedSymbol):
if rhs_vectorized and not lhs_vectorized:
if lhs_scalar and not rhs_scalar:
lhs_type = get_type_of_expression(assignment.lhs)
rhs_type = get_type_of_expression(assignment.rhs)
new_lhs_type = VectorType(lhs_type, rhs_type.width)
new_lhs = TypedSymbol(assignment.lhs.name, new_lhs_type)
substitution_dict[assignment.lhs] = new_lhs
......
File moved