Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (44)
Showing
with 691 additions and 189 deletions
...@@ -3,6 +3,7 @@ import pytest ...@@ -3,6 +3,7 @@ import pytest
import tempfile import tempfile
import runpy import runpy
import sys import sys
import warnings
# Trigger config file reading / creation once - to avoid race conditions when multiple instances are creating it # Trigger config file reading / creation once - to avoid race conditions when multiple instances are creating it
# at the same time # at the same time
from pystencils.cpu import cpujit from pystencils.cpu import cpujit
...@@ -128,8 +129,11 @@ class IPyNbFile(pytest.File): ...@@ -128,8 +129,11 @@ class IPyNbFile(pytest.File):
exporter.exclude_input_prompt = True exporter.exclude_input_prompt = True
notebook_contents = self.fspath.open() notebook_contents = self.fspath.open()
notebook = nbformat.read(notebook_contents, 4)
code, _ = exporter.from_notebook_node(notebook) with warnings.catch_warnings():
warnings.filterwarnings("ignore", "IPython.core.inputsplitter is deprecated")
notebook = nbformat.read(notebook_contents, 4)
code, _ = exporter.from_notebook_node(notebook)
yield IPyNbTest(self.name, self, code) yield IPyNbTest(self.name, self, code)
def teardown(self): def teardown(self):
......
...@@ -310,6 +310,7 @@ class Block(Node): ...@@ -310,6 +310,7 @@ class Block(Node):
def insert_before(self, new_node, insert_before): def insert_before(self, new_node, insert_before):
new_node.parent = self new_node.parent = self
assert self._nodes.count(insert_before) == 1
idx = self._nodes.index(insert_before) idx = self._nodes.index(insert_before)
# move all assignment (definitions to the top) # move all assignment (definitions to the top)
...@@ -337,6 +338,7 @@ class Block(Node): ...@@ -337,6 +338,7 @@ class Block(Node):
return tmp return tmp
def replace(self, child, replacements): def replace(self, child, replacements):
assert self._nodes.count(child) == 1
idx = self._nodes.index(child) idx = self._nodes.index(child)
del self._nodes[idx] del self._nodes[idx]
if type(replacements) is list: if type(replacements) is list:
...@@ -516,12 +518,13 @@ class LoopOverCoordinate(Node): ...@@ -516,12 +518,13 @@ class LoopOverCoordinate(Node):
class SympyAssignment(Node): class SympyAssignment(Node):
def __init__(self, lhs_symbol, rhs_expr, is_const=True): def __init__(self, lhs_symbol, rhs_expr, is_const=True, use_auto=False):
super(SympyAssignment, self).__init__(parent=None) super(SympyAssignment, self).__init__(parent=None)
self._lhs_symbol = lhs_symbol self._lhs_symbol = lhs_symbol
self.rhs = sp.sympify(rhs_expr) self.rhs = sp.sympify(rhs_expr)
self._is_const = is_const self._is_const = is_const
self._is_declaration = self.__is_declaration() self._is_declaration = self.__is_declaration()
self.use_auto = use_auto
def __is_declaration(self): def __is_declaration(self):
if isinstance(self._lhs_symbol, cast_func): if isinstance(self._lhs_symbol, cast_func):
......
...@@ -4,16 +4,17 @@ from typing import Set ...@@ -4,16 +4,17 @@ from typing import Set
import numpy as np import numpy as np
import sympy as sp import sympy as sp
from sympy.core import S from sympy.core import S
from sympy.logic.boolalg import BooleanFalse, BooleanTrue
from sympy.printing.ccode import C89CodePrinter from sympy.printing.ccode import C89CodePrinter
from pystencils.astnodes import KernelFunction, Node from pystencils.astnodes import KernelFunction, Node
from pystencils.cpu.vectorization import vec_all, vec_any from pystencils.cpu.vectorization import vec_all, vec_any
from pystencils.data_types import ( from pystencils.data_types import (
PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, reinterpret_cast_func,
reinterpret_cast_func, vector_memory_access) vector_memory_access)
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.integer_functions import ( from pystencils.integer_functions import (
bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, int_div, int_power_of_2, modulo_ceil)
int_div, int_power_of_2, modulo_ceil)
try: try:
from sympy.printing.ccode import C99CodePrinter as CCodePrinter from sympy.printing.ccode import C99CodePrinter as CCodePrinter
...@@ -32,9 +33,9 @@ def generate_c(ast_node: Node, ...@@ -32,9 +33,9 @@ def generate_c(ast_node: Node,
with_globals=True) -> str: with_globals=True) -> str:
"""Prints an abstract syntax tree node as C or CUDA code. """Prints an abstract syntax tree node as C or CUDA code.
This function does not need to distinguish between C, C++ or CUDA code, it just prints 'C-like' code as encoded This function does not need to distinguish for most AST nodes between C, C++ or CUDA code, it just prints 'C-like'
in the abstract syntax tree (AST). The AST is built differently for C or CUDA by calling different create_kernel code as encoded in the abstract syntax tree (AST). The AST is built differently for C or CUDA by calling different
functions. create_kernel functions.
Args: Args:
ast_node: ast_node:
...@@ -229,11 +230,15 @@ class CBackend: ...@@ -229,11 +230,15 @@ class CBackend:
def _print_SympyAssignment(self, node): def _print_SympyAssignment(self, node):
if node.is_declaration: if node.is_declaration:
if node.is_const: if node.use_auto:
prefix = 'const ' data_type = 'auto '
else: else:
prefix = '' if node.is_const:
data_type = prefix + self._print(node.lhs.dtype).replace(' const', '') + " " prefix = 'const '
else:
prefix = ''
data_type = prefix + self._print(node.lhs.dtype).replace(' const', '') + " "
return "%s%s = %s;" % (data_type, return "%s%s = %s;" % (data_type,
self.sympy_printer.doprint(node.lhs), self.sympy_printer.doprint(node.lhs),
self.sympy_printer.doprint(node.rhs)) self.sympy_printer.doprint(node.rhs))
...@@ -292,6 +297,10 @@ class CBackend: ...@@ -292,6 +297,10 @@ class CBackend:
return "" return ""
def _print_Conditional(self, node): def _print_Conditional(self, node):
if type(node.condition_expr) is BooleanTrue:
return self._print_Block(node.true_block)
elif type(node.condition_expr) is BooleanFalse:
return self._print_Block(node.false_block)
cond_type = get_type_of_expression(node.condition_expr) cond_type = get_type_of_expression(node.condition_expr)
if isinstance(cond_type, VectorType): if isinstance(cond_type, VectorType):
raise ValueError("Problem with Conditional inside vectorized loop - use vec_any or vec_all") raise ValueError("Problem with Conditional inside vectorized loop - use vec_any or vec_all")
...@@ -381,7 +390,8 @@ class CustomSympyPrinter(CCodePrinter): ...@@ -381,7 +390,8 @@ class CustomSympyPrinter(CCodePrinter):
elif expr.func == int_div: elif expr.func == int_div:
return "((%s) / (%s))" % (self._print(expr.args[0]), self._print(expr.args[1])) return "((%s) / (%s))" % (self._print(expr.args[0]), self._print(expr.args[1]))
else: else:
return super(CustomSympyPrinter, self)._print_Function(expr) arg_str = ', '.join(self._print(a) for a in expr.args)
return f'{expr.name}({arg_str})'
def _typed_number(self, number, dtype): def _typed_number(self, number, dtype):
res = self._print(number) res = self._print(number)
......
...@@ -34,6 +34,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke ...@@ -34,6 +34,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
transformation :func:`pystencils.transformation.split_inner_loop` transformation :func:`pystencils.transformation.split_inner_loop`
iteration_slice: if not None, iteration is done only over this slice of the field iteration_slice: if not None, iteration is done only over this slice of the field
ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
that should be excluded from the iteration.
if None, the number of ghost layers is determined automatically and assumed to be equal for a if None, the number of ghost layers is determined automatically and assumed to be equal for a
all dimensions all dimensions
skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
......
...@@ -2,18 +2,17 @@ import warnings ...@@ -2,18 +2,17 @@ import warnings
from typing import Container, Union from typing import Container, Union
import sympy as sp import sympy as sp
from sympy.logic.boolalg import BooleanFunction
import pystencils.astnodes as ast import pystencils.astnodes as ast
from pystencils.backends.simd_instruction_sets import get_vector_instruction_set from pystencils.backends.simd_instruction_sets import get_vector_instruction_set
from pystencils.data_types import ( from pystencils.data_types import (
PointerType, TypedSymbol, VectorType, cast_func, collate_types, get_type_of_expression, PointerType, TypedSymbol, VectorType, cast_func, collate_types, get_type_of_expression, vector_memory_access)
vector_memory_access)
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.field import Field from pystencils.field import Field
from pystencils.integer_functions import modulo_ceil, modulo_floor from pystencils.integer_functions import modulo_ceil, modulo_floor
from pystencils.sympyextensions import fast_subs from pystencils.sympyextensions import fast_subs
from pystencils.transformations import ( from pystencils.transformations import cut_loop, filtered_tree_iteration, replace_inner_stride_with_one
cut_loop, filtered_tree_iteration, replace_inner_stride_with_one)
# noinspection PyPep8Naming # noinspection PyPep8Naming
...@@ -177,7 +176,7 @@ def insert_vector_casts(ast_node): ...@@ -177,7 +176,7 @@ def insert_vector_casts(ast_node):
visit_expr(expr.args[4])) visit_expr(expr.args[4]))
elif isinstance(expr, cast_func): elif isinstance(expr, cast_func):
return expr return expr
elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, sp.boolalg.BooleanFunction): elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction):
new_args = [visit_expr(a) for a in expr.args] new_args = [visit_expr(a) for a in expr.args]
arg_types = [get_type_of_expression(a) for a in new_args] arg_types = [get_type_of_expression(a) for a in new_args]
if not any(type(t) is VectorType for t in arg_types): if not any(type(t) is VectorType for t in arg_types):
......
...@@ -4,14 +4,14 @@ from functools import partial ...@@ -4,14 +4,14 @@ from functools import partial
from typing import Tuple from typing import Tuple
import numpy as np import numpy as np
import pystencils
import sympy as sp import sympy as sp
import sympy.codegen.ast import sympy.codegen.ast
from sympy.core.cache import cacheit
from sympy.logic.boolalg import Boolean, BooleanFunction
import pystencils
from pystencils.cache import memorycache, memorycache_if_hashable from pystencils.cache import memorycache, memorycache_if_hashable
from pystencils.utils import all_equal from pystencils.utils import all_equal
from sympy.core.cache import cacheit
from sympy.logic.boolalg import Boolean
try: try:
import llvmlite.ir as ir import llvmlite.ir as ir
...@@ -541,14 +541,20 @@ def get_type_of_expression(expr, ...@@ -541,14 +541,20 @@ def get_type_of_expression(expr,
elif isinstance(expr, sp.Indexed): elif isinstance(expr, sp.Indexed):
typed_symbol = expr.base.label typed_symbol = expr.base.label
return typed_symbol.dtype.base_type return typed_symbol.dtype.base_type
elif isinstance(expr, (sp.boolalg.Boolean, sp.boolalg.BooleanFunction)): elif isinstance(expr, (Boolean, BooleanFunction)):
# if any arg is of vector type return a vector boolean, else return a normal scalar boolean # if any arg is of vector type return a vector boolean, else return a normal scalar boolean
result = create_type("bool") result = create_type("bool")
vec_args = [get_type(a) for a in expr.args if isinstance(get_type(a), VectorType)] vec_args = [get_type(a) for a in expr.args if isinstance(get_type(a), VectorType)]
if vec_args: if vec_args:
result = VectorType(result, width=vec_args[0].width) result = VectorType(result, width=vec_args[0].width)
return result return result
elif isinstance(expr, (sp.Pow, sp.Sum, sp.Product)): elif isinstance(expr, sp.Pow):
base_type = get_type(expr.args[0])
if expr.exp.is_integer:
return base_type
else:
return collate_types([create_type(default_float_type), base_type])
elif isinstance(expr, (sp.Sum, sp.Product)):
return get_type(expr.args[0]) return get_type(expr.args[0])
elif isinstance(expr, sp.Expr): elif isinstance(expr, sp.Expr):
expr: sp.Expr expr: sp.Expr
...@@ -821,3 +827,6 @@ class TypedImaginaryUnit(TypedSymbol): ...@@ -821,3 +827,6 @@ class TypedImaginaryUnit(TypedSymbol):
__xnew__ = staticmethod(__new_stage2__) __xnew__ = staticmethod(__new_stage2__)
__xnew_cached_ = staticmethod(cacheit(__new_stage2__)) __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
def __getnewargs__(self):
return (self.dtype,)
...@@ -109,11 +109,14 @@ class ParallelDataHandling(DataHandling): ...@@ -109,11 +109,14 @@ class ParallelDataHandling(DataHandling):
if hasattr(values_per_cell, '__len__'): if hasattr(values_per_cell, '__len__'):
raise NotImplementedError("Parallel data handling does not support multiple index dimensions") raise NotImplementedError("Parallel data handling does not support multiple index dimensions")
self._fieldInformation[name] = {'ghost_layers': ghost_layers, self._fieldInformation[name] = {
'values_per_cell': values_per_cell, 'ghost_layers': ghost_layers,
'layout': layout, 'values_per_cell': values_per_cell,
'dtype': dtype, 'layout': layout,
'alignment': alignment} 'dtype': dtype,
'alignment': alignment,
'field_type': field_type,
}
layout_map = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf, layout_map = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf,
'f': wlb.field.Layout.fzyx, 'f': wlb.field.Layout.fzyx,
......
...@@ -100,6 +100,7 @@ class SerialDataHandling(DataHandling): ...@@ -100,6 +100,7 @@ class SerialDataHandling(DataHandling):
'layout': layout, 'layout': layout,
'dtype': dtype, 'dtype': dtype,
'alignment': alignment, 'alignment': alignment,
'field_type': field_type,
} }
index_dimensions = len(values_per_cell) index_dimensions = len(values_per_cell)
......
import warnings import warnings
from collections import defaultdict from collections import defaultdict
import itertools
import numpy as np import numpy as np
import sympy as sp import sympy as sp
from pystencils.field import Field from pystencils.field import Field
from pystencils.stencil import direction_string_to_offset
from pystencils.sympyextensions import multidimensional_sum, prod from pystencils.sympyextensions import multidimensional_sum, prod
from pystencils.utils import LinearEquationSystem, fully_contains from pystencils.utils import LinearEquationSystem, fully_contains
...@@ -228,3 +230,120 @@ class FiniteDifferenceStencilDerivation: ...@@ -228,3 +230,120 @@ class FiniteDifferenceStencilDerivation:
def __repr__(self): def __repr__(self):
return "Finite difference stencil of accuracy {}, isotropic error: {}".format(self.accuracy, return "Finite difference stencil of accuracy {}, isotropic error: {}".format(self.accuracy,
self.is_isotropic) self.is_isotropic)
class FiniteDifferenceStaggeredStencilDerivation:
"""Derives a finite difference stencil for application at a staggered position
Args:
neighbor: the neighbor direction string or vector at whose staggered position to calculate the derivative
dim: how many dimensions (2 or 3)
derivative: a tuple of directions over which to perform derivatives
"""
def __init__(self, neighbor, dim, derivative=tuple()):
if type(neighbor) is str:
neighbor = direction_string_to_offset(neighbor)
if dim == 2:
assert neighbor[dim:] == 0
assert derivative is tuple() or max(derivative) < dim
neighbor = sp.Matrix(neighbor[:dim])
pos = neighbor / 2
def unitvec(i):
"""return the `i`-th unit vector in three dimensions"""
a = np.zeros(dim, dtype=int)
a[i] = 1
return a
def flipped(a, i):
"""return `a` with its `i`-th element's sign flipped"""
a = a.copy()
a[i] *= -1
return a
# determine the points to use, coordinates are relative to position
points = []
if np.linalg.norm(neighbor, 1) == 1:
main_points = [neighbor / 2, neighbor / -2]
elif np.linalg.norm(neighbor, 1) == 2:
nonzero_indices = [i for i, v in enumerate(neighbor) if v != 0 and i < dim]
main_points = [neighbor / 2, neighbor / -2, flipped(neighbor / 2, nonzero_indices[0]),
flipped(neighbor / -2, nonzero_indices[0])]
else:
main_points = [neighbor.multiply_elementwise(sp.Matrix(c) / 2)
for c in itertools.product([-1, 1], repeat=3)]
points += main_points
zero_indices = [i for i, v in enumerate(neighbor) if v == 0 and i < dim]
for i in zero_indices:
points += [point + sp.Matrix(unitvec(i)) for point in main_points]
points += [point - sp.Matrix(unitvec(i)) for point in main_points]
points_tuple = tuple([tuple(p) for p in points])
self._stencil = points_tuple
# determine the stencil weights
if len(derivative) == 0:
weights = None
else:
derivation = FiniteDifferenceStencilDerivation(derivative, points_tuple).get_stencil()
if not derivation.accuracy:
raise Exception('the requested derivative cannot be performed with the available neighbors')
weights = derivation.weights
# if the weights are underdefined, we can choose the free symbols to find the sparsest stencil
free_weights = set(itertools.chain(*[w.free_symbols for w in weights]))
if len(free_weights) > 0:
zero_counts = defaultdict(list)
for values in itertools.product([-1, -sp.Rational(1, 2), 0, 1, sp.Rational(1, 2)],
repeat=len(free_weights)):
subs = {free_weight: value for free_weight, value in zip(free_weights, values)}
weights = [w.subs(subs) for w in derivation.weights]
if not all(a == 0 for a in weights):
zero_count = sum([1 for w in weights if w == 0])
zero_counts[zero_count].append(weights)
best = zero_counts[max(zero_counts.keys())]
if len(best) > 1: # if there are multiple, pick the one that contains a nonzero center weight
center = [tuple(p + pos) for p in points].index((0, 0, 0))
best = [b for b in best if b[center] != 0]
if len(best) > 1:
raise NotImplementedError("more than one suitable set of weights found, don't know how to proceed")
weights = best[0]
assert weights
points_tuple = tuple([tuple(p + pos) for p in points])
self._points = points_tuple
self._weights = weights
@property
def points(self):
"""return the points of the stencil"""
return self._points
@property
def stencil(self):
"""return the points of the stencil relative to the staggered position specified by neighbor"""
return self._stencil
@property
def weights(self):
"""return the weights of the stencil"""
assert self._weights is not None
return self._weights
def visualize(self):
if self._weights is None:
ws = None
else:
ws = np.array([w for w in self.weights if w != 0], dtype=float)
pts = np.array([p for i, p in enumerate(self.points) if self.weights[i] != 0], dtype=int)
from pystencils.stencil import plot
plot(pts, data=ws)
def apply(self, field):
if field.index_dimensions == 0:
return sum([field.__getitem__(point) * weight for point, weight in zip(self.points, self.weights)])
else:
total = field.neighbor_vector(self.points[0]) * self.weights[0]
for point, weight in zip(self.points[1:], self.weights[1:]):
total += field.neighbor_vector(point) * weight
return total
...@@ -15,7 +15,7 @@ import pystencils ...@@ -15,7 +15,7 @@ import pystencils
from pystencils.alignedarray import aligned_empty from pystencils.alignedarray import aligned_empty
from pystencils.data_types import StructType, TypedSymbol, create_type from pystencils.data_types import StructType, TypedSymbol, create_type
from pystencils.kernelparameters import FieldShapeSymbol, FieldStrideSymbol from pystencils.kernelparameters import FieldShapeSymbol, FieldStrideSymbol
from pystencils.stencil import direction_string_to_offset, offset_to_direction_string from pystencils.stencil import direction_string_to_offset, offset_to_direction_string, inverse_direction
from pystencils.sympyextensions import is_integer_sequence from pystencils.sympyextensions import is_integer_sequence
__all__ = ['Field', 'fields', 'FieldType', 'AbstractField'] __all__ = ['Field', 'fields', 'FieldType', 'AbstractField']
...@@ -34,6 +34,8 @@ class FieldType(Enum): ...@@ -34,6 +34,8 @@ class FieldType(Enum):
CUSTOM = 3 CUSTOM = 3
# staggered field # staggered field
STAGGERED = 4 STAGGERED = 4
# staggered field that reverses sign when accessed via opposite direction
STAGGERED_FLUX = 5
@staticmethod @staticmethod
def is_generic(field): def is_generic(field):
...@@ -58,7 +60,12 @@ class FieldType(Enum): ...@@ -58,7 +60,12 @@ class FieldType(Enum):
@staticmethod @staticmethod
def is_staggered(field): def is_staggered(field):
assert isinstance(field, Field) assert isinstance(field, Field)
return field.field_type == FieldType.STAGGERED return field.field_type == FieldType.STAGGERED or field.field_type == FieldType.STAGGERED_FLUX
@staticmethod
def is_staggered_flux(field):
assert isinstance(field, Field)
return field.field_type == FieldType.STAGGERED_FLUX
def fields(description=None, index_dimensions=0, layout=None, field_type=FieldType.GENERIC, **kwargs): def fields(description=None, index_dimensions=0, layout=None, field_type=FieldType.GENERIC, **kwargs):
...@@ -83,7 +90,7 @@ def fields(description=None, index_dimensions=0, layout=None, field_type=FieldTy ...@@ -83,7 +90,7 @@ def fields(description=None, index_dimensions=0, layout=None, field_type=FieldTy
Format string can be left out, field names are taken from keyword arguments. Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s) >>> fields(f1=arr_s, f2=arr_s)
[f1, f2] [f1: double[20,20], f2: double[20,20]]
The keyword names ``index_dimension`` and ``layout`` have special meaning, don't use them for field names The keyword names ``index_dimension`` and ``layout`` have special meaning, don't use them for field names
>>> f = fields(f=arr_v, index_dimensions=1) >>> f = fields(f=arr_v, index_dimensions=1)
...@@ -392,7 +399,19 @@ class Field(AbstractField): ...@@ -392,7 +399,19 @@ class Field(AbstractField):
return self.dtype.numpy_dtype.itemsize return self.dtype.numpy_dtype.itemsize
def __repr__(self): def __repr__(self):
return self._field_name if any(isinstance(s, sp.Symbol) for s in self.spatial_shape):
spatial_shape_str = f'{self.spatial_dimensions}d'
else:
spatial_shape_str = ','.join(str(i) for i in self.spatial_shape)
index_shape_str = ','.join(str(i) for i in self.index_shape)
if self.index_shape:
return f'{self._field_name}({index_shape_str}): {self.dtype}[{spatial_shape_str}]'
else:
return f'{self._field_name}: {self.dtype}[{spatial_shape_str}]'
def __str__(self):
return self.name
def neighbor(self, coord_id, offset): def neighbor(self, coord_id, offset):
offset_list = [0] * self.spatial_dimensions offset_list = [0] * self.spatial_dimensions
...@@ -407,19 +426,37 @@ class Field(AbstractField): ...@@ -407,19 +426,37 @@ class Field(AbstractField):
index_shape = self.index_shape index_shape = self.index_shape
if len(index_shape) == 0: if len(index_shape) == 0:
return sp.Matrix([self.center]) return sp.Matrix([self.center])
if len(index_shape) == 1: elif len(index_shape) == 1:
return sp.Matrix([self(i) for i in range(index_shape[0])]) return sp.Matrix([self(i) for i in range(index_shape[0])])
elif len(index_shape) == 2: elif len(index_shape) == 2:
def cb(*args): return sp.Matrix([[self(i, j) for j in range(index_shape[1])] for i in range(index_shape[0])])
r = self.__call__(*args) elif len(index_shape) == 3:
return r return sp.Matrix([[[self(i, j, k) for k in range(index_shape[2])]
return sp.Matrix(*index_shape, cb) for j in range(index_shape[1])] for i in range(index_shape[0])])
else:
raise NotImplementedError("center_vector is not implemented for more than 3 index dimensions")
@property @property
def center(self): def center(self):
center = tuple([0] * self.spatial_dimensions) center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center) return Field.Access(self, center)
def neighbor_vector(self, offset):
"""Like neighbor, but returns the entire vector/tensor stored at offset."""
if self.spatial_dimensions == 2 and len(offset) == 3:
assert offset[2] == 0
offset = offset[:2]
if self.index_dimensions == 0:
return sp.Matrix([self.__getitem__(offset)])
elif self.index_dimensions == 1:
return sp.Matrix([self.__getitem__(offset)(i) for i in range(self.index_shape[0])])
elif self.index_dimensions == 2:
return sp.Matrix([[self.__getitem__(offset)(i, k) for k in range(self.index_shape[1])]
for i in range(self.index_shape[0])])
else:
raise NotImplementedError("neighbor_vector is not implemented for more than 2 index dimensions")
def __getitem__(self, offset): def __getitem__(self, offset):
if type(offset) is np.ndarray: if type(offset) is np.ndarray:
offset = tuple(offset) offset = tuple(offset)
...@@ -466,6 +503,7 @@ class Field(AbstractField): ...@@ -466,6 +503,7 @@ class Field(AbstractField):
""" """
assert FieldType.is_staggered(self) assert FieldType.is_staggered(self)
offset_orig = offset
if type(offset) is np.ndarray: if type(offset) is np.ndarray:
offset = tuple(offset) offset = tuple(offset)
if type(offset) is str: if type(offset) is str:
...@@ -477,20 +515,29 @@ class Field(AbstractField): ...@@ -477,20 +515,29 @@ class Field(AbstractField):
raise ValueError("Wrong number of spatial indices: " raise ValueError("Wrong number of spatial indices: "
"Got %d, expected %d" % (len(offset), self.spatial_dimensions)) "Got %d, expected %d" % (len(offset), self.spatial_dimensions))
offset = list(offset) prefactor = 1
neighbor = [0] * len(offset) neighbor_vec = [0] * len(offset)
for i, o in enumerate(offset): for i in range(self.spatial_dimensions):
if (o + sp.Rational(1, 2)).is_Integer: if (offset[i] + sp.Rational(1, 2)).is_Integer:
offset[i] += sp.Rational(1, 2) neighbor_vec[i] = sp.sign(offset[i])
neighbor[i] = 1 neighbor = offset_to_direction_string(neighbor_vec)
neighbor = offset_to_direction_string(neighbor) if neighbor not in self.staggered_stencil:
neighbor_vec = inverse_direction(neighbor_vec)
neighbor = offset_to_direction_string(neighbor_vec)
if FieldType.is_staggered_flux(self):
prefactor = -1
if neighbor not in self.staggered_stencil:
raise ValueError("{} is not a valid neighbor for the {} stencil".format(offset_orig,
self.staggered_stencil_name))
offset = tuple(sp.Matrix(offset) - sp.Rational(1, 2) * sp.Matrix(neighbor_vec))
idx = self.staggered_stencil.index(neighbor) idx = self.staggered_stencil.index(neighbor)
offset = tuple(offset)
if self.index_dimensions == 1: # this field stores a scalar value at each staggered position if self.index_dimensions == 1: # this field stores a scalar value at each staggered position
if index is not None: if index is not None:
raise ValueError("Cannot specify an index for a scalar staggered field") raise ValueError("Cannot specify an index for a scalar staggered field")
return Field.Access(self, offset, (idx,)) return prefactor * Field.Access(self, offset, (idx,))
else: # this field stores a vector or tensor at each staggered position else: # this field stores a vector or tensor at each staggered position
if index is None: if index is None:
raise ValueError("Wrong number of indices: " raise ValueError("Wrong number of indices: "
...@@ -503,34 +550,58 @@ class Field(AbstractField): ...@@ -503,34 +550,58 @@ class Field(AbstractField):
raise ValueError("Wrong number of indices: " raise ValueError("Wrong number of indices: "
"Got %d, expected %d" % (len(index), self.index_dimensions - 1)) "Got %d, expected %d" % (len(index), self.index_dimensions - 1))
return Field.Access(self, offset, (idx, *index)) return prefactor * Field.Access(self, offset, (idx, *index))
def staggered_vector_access(self, offset):
"""Like staggered_access, but returns the entire vector/tensor stored at offset."""
assert FieldType.is_staggered(self)
if self.index_dimensions == 1:
return sp.Matrix([self.staggered_access(offset)])
elif self.index_dimensions == 2:
return sp.Matrix([self.staggered_access(offset, i) for i in range(self.index_shape[1])])
elif self.index_dimensions == 3:
return sp.Matrix([[self.staggered_access(offset, (i, k)) for k in range(self.index_shape[2])]
for i in range(self.index_shape[1])])
else:
raise NotImplementedError("staggered_vector_access is not implemented for more than 3 index dimensions")
@property @property
def staggered_stencil(self): def staggered_stencil(self):
assert FieldType.is_staggered(self) assert FieldType.is_staggered(self)
stencils = { stencils = {
2: { 2: {
2: ["E", "N"], # D2Q5 2: ["W", "S"], # D2Q5
4: ["E", "N", "NE", "SE"] # D2Q9 4: ["W", "S", "SW", "NW"] # D2Q9
}, },
3: { 3: {
3: ["E", "N", "T"], # D3Q7 3: ["W", "S", "B"], # D3Q7
7: ["E", "N", "T", "TNE", "BNE", "TSE", "BSE "], # D3Q15 7: ["W", "S", "B", "BSW", "TSW", "BNW", "TNW"], # D3Q15
9: ["E", "N", "T", "NE", "SE", "TE", "BE", "TN", "BN"], # D3Q19 9: ["W", "S", "B", "SW", "NW", "BW", "TW", "BS", "TS"], # D3Q19
13: ["E", "N", "T", "NE", "SE", "TE", "BE", "TN", "BN", "TNE", "BNE", "TSE", "BSE"] # D3Q27 13: ["W", "S", "B", "SW", "NW", "BW", "TW", "BS", "TS", "BSW", "TSW", "BNW", "TNW"] # D3Q27
} }
} }
if not self.index_shape[0] in stencils[self.spatial_dimensions]: if not self.index_shape[0] in stencils[self.spatial_dimensions]:
raise ValueError("No known stencil has {} staggered points".format(self.index_shape[0])) raise ValueError("No known stencil has {} staggered points".format(self.index_shape[0]))
return stencils[self.spatial_dimensions][self.index_shape[0]] return stencils[self.spatial_dimensions][self.index_shape[0]]
@property
def staggered_stencil_name(self):
assert FieldType.is_staggered(self)
return "D%dQ%d" % (self.spatial_dimensions, self.index_shape[0] * 2 + 1)
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
center = tuple([0] * self.spatial_dimensions) center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center)(*args, **kwargs) return Field.Access(self, center)(*args, **kwargs)
def hashable_contents(self): def hashable_contents(self):
dth = hash(self._dtype) return (self._layout,
return self._layout, self.shape, self.strides, dth, self.field_type, self._field_name, self.latex_name self.shape,
self.strides,
self.field_type,
self._field_name,
self.latex_name,
self._dtype)
def __hash__(self): def __hash__(self):
return hash(self.hashable_contents()) return hash(self.hashable_contents())
...@@ -774,7 +845,7 @@ class Field(AbstractField): ...@@ -774,7 +845,7 @@ class Field(AbstractField):
assert FieldType.is_staggered(self._field) assert FieldType.is_staggered(self._field)
neighbor = self._field.staggered_stencil[index] neighbor = self._field.staggered_stencil[index]
neighbor = direction_string_to_offset(neighbor, self._field.spatial_dimensions) neighbor = direction_string_to_offset(neighbor, self._field.spatial_dimensions)
return [(o - sp.Rational(neighbor[i], 2)) for i, o in enumerate(offsets)] return [(o - sp.Rational(int(neighbor[i]), 2)) for i, o in enumerate(offsets)]
def _latex(self, _): def _latex(self, _):
n = self._field.latex_name if self._field.latex_name else self._field.name n = self._field.latex_name if self._field.latex_name else self._field.name
......
import itertools
from types import MappingProxyType from types import MappingProxyType
from itertools import combinations
import sympy as sp import sympy as sp
from pystencils.assignment import Assignment from pystencils.assignment import Assignment
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
from pystencils.cpu.vectorization import vectorize from pystencils.cpu.vectorization import vectorize
from pystencils.field import Field, FieldType
from pystencils.gpucuda.indexing import indexing_creator_from_params from pystencils.gpucuda.indexing import indexing_creator_from_params
from pystencils.simp.assignment_collection import AssignmentCollection from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
from pystencils.transformations import ( from pystencils.transformations import (
loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel) loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
...@@ -23,7 +25,9 @@ def create_kernel(assignments, ...@@ -23,7 +25,9 @@ def create_kernel(assignments,
cpu_blocking=None, cpu_blocking=None,
gpu_indexing='block', gpu_indexing='block',
gpu_indexing_params=MappingProxyType({}), gpu_indexing_params=MappingProxyType({}),
use_textures_for_interpolation=True): use_textures_for_interpolation=True,
cpu_prepend_optimizations=[],
use_auto_for_assignments=False):
""" """
Creates abstract syntax tree (AST) of kernel, using a list of update equations. Creates abstract syntax tree (AST) of kernel, using a list of update equations.
...@@ -34,9 +38,9 @@ def create_kernel(assignments, ...@@ -34,9 +38,9 @@ def create_kernel(assignments,
to type to type
iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \ iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
part of the field is iterated over part of the field is iterated over
ghost_layers: if left to default, the number of necessary ghost layers is determined automatically ghost_layers: a single integer specifies the ghost layer count at all borders, can also be a sequence of
a single integer specifies the ghost layer count at all borders, can also be a sequence of pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration.
pairs ``[(x_lower_gl, x_upper_gl), .... ]`` If left to default, the number of ghost layers is determined automatically.
skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
periodicity kernel, that access the field outside the iteration bounds. Use with care! periodicity kernel, that access the field outside the iteration bounds. Use with care!
cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
...@@ -47,6 +51,7 @@ def create_kernel(assignments, ...@@ -47,6 +51,7 @@ def create_kernel(assignments,
gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing` gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class) gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }' e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
Returns: Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
...@@ -84,6 +89,8 @@ def create_kernel(assignments, ...@@ -84,6 +89,8 @@ def create_kernel(assignments,
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers, iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check) skip_independence_check=skip_independence_check)
for optimization in cpu_prepend_optimizations:
optimization(ast)
omp_collapse = None omp_collapse = None
if cpu_blocking: if cpu_blocking:
omp_collapse = loop_blocking(ast, cpu_blocking) omp_collapse = loop_blocking(ast, cpu_blocking)
...@@ -96,12 +103,10 @@ def create_kernel(assignments, ...@@ -96,12 +103,10 @@ def create_kernel(assignments,
vectorize(ast, **cpu_vectorize_info) vectorize(ast, **cpu_vectorize_info)
else: else:
raise ValueError("Invalid value for cpu_vectorize_info") raise ValueError("Invalid value for cpu_vectorize_info")
return ast
elif target == 'llvm': elif target == 'llvm':
from pystencils.llvm import create_kernel from pystencils.llvm import create_kernel
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers) iteration_slice=iteration_slice, ghost_layers=ghost_layers)
return ast
elif target == 'gpu': elif target == 'gpu':
from pystencils.gpucuda import create_cuda_kernel from pystencils.gpucuda import create_cuda_kernel
ast = create_cuda_kernel(assignments, type_info=data_type, ast = create_cuda_kernel(assignments, type_info=data_type,
...@@ -109,10 +114,15 @@ def create_kernel(assignments, ...@@ -109,10 +114,15 @@ def create_kernel(assignments,
iteration_slice=iteration_slice, ghost_layers=ghost_layers, iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check, skip_independence_check=skip_independence_check,
use_textures_for_interpolation=use_textures_for_interpolation) use_textures_for_interpolation=use_textures_for_interpolation)
return ast
else: else:
raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,)) raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))
if use_auto_for_assignments:
for a in ast.atoms(SympyAssignment):
a.use_auto = True
return ast
def create_indexed_kernel(assignments, def create_indexed_kernel(assignments,
index_fields, index_fields,
...@@ -186,104 +196,132 @@ def create_indexed_kernel(assignments, ...@@ -186,104 +196,132 @@ def create_indexed_kernel(assignments,
raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,)) raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu', def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=False, **kwargs):
gpu_exclusive_conditions=False, **kwargs):
"""Kernel that updates a staggered field. """Kernel that updates a staggered field.
.. image:: /img/staggered_grid.svg .. image:: /img/staggered_grid.svg
For a staggered field, the first index coordinate defines the location of the staggered value.
Further index coordinates can be used to store vectors/tensors at each point.
Args: Args:
staggered_field: field where the first index coordinate defines the location of the staggered value assignments: a sequence of assignments or an AssignmentCollection.
can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location Assignments to staggered field are processed specially, while subexpressions and assignments to
a vector is stored, expressions parameter has to be a sequence of sequences then regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell used, but they all need to use the same stencil (i.e. the same number of staggered points) and
boundary and ``f[0,0](1)`` the southern cell boundary etc. shape.
expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary target: 'cpu', 'llvm' or 'gpu'
should be updated. gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions handled in an else branch.
target: 'cpu' or 'gpu' kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed
gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed
Returns: Returns:
AST, see `create_kernel` AST, see `create_kernel`
""" """
assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
assert staggered_field.index_dimensions in (1, 2), 'Staggered field must have one or two index dimensions'
if isinstance(assignments, AssignmentCollection):
subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
if not hasattr(a, 'lhs')
or type(a.lhs) is not Field.Access
or not FieldType.is_staggered(a.lhs.field)]
assignments = [a for a in assignments.main_assignments if hasattr(a, 'lhs')
and type(a.lhs) is Field.Access
and FieldType.is_staggered(a.lhs.field)]
else:
subexpressions = [a for a in assignments if not hasattr(a, 'lhs')
or type(a.lhs) is not Field.Access
or not FieldType.is_staggered(a.lhs.field)]
assignments = [a for a in assignments if hasattr(a, 'lhs')
and type(a.lhs) is Field.Access
and FieldType.is_staggered(a.lhs.field)]
if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
raise ValueError("All assignments need to be made to staggered fields with the same stencil")
if len(set([a.lhs.field.shape for a in assignments])) != 1:
raise ValueError("All assignments need to be made to staggered fields with the same shape")
staggered_field = assignments[0].lhs.field
stencil = staggered_field.staggered_stencil
dim = staggered_field.spatial_dimensions dim = staggered_field.spatial_dimensions
shape = staggered_field.shape
counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)] counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
conditions = [counters[i] < staggered_field.shape[i] - 1 for i in range(dim)]
assert len(expressions) == dim
if staggered_field.index_dimensions == 2:
assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \
"If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \
"same length."
final_assignments = [] final_assignments = []
last_conditional = None
def add(condition, dimensions, as_else_block=False):
nonlocal last_conditional
if staggered_field.index_dimensions == 1:
assignments = [Assignment(staggered_field(d), expressions[d]) for d in dimensions]
a_coll = AssignmentCollection(assignments, list(subexpressions))
a_coll = a_coll.new_filtered([staggered_field(d) for d in dimensions])
elif staggered_field.index_dimensions == 2:
assert staggered_field.has_fixed_index_shape
assignments = [Assignment(staggered_field(d, i), expr)
for d in dimensions
for i, expr in enumerate(expressions[d])]
a_coll = AssignmentCollection(assignments, list(subexpressions))
a_coll = a_coll.new_filtered([staggered_field(d, i) for i in range(staggered_field.index_shape[1])
for d in dimensions])
sp_assignments = [SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments]
if as_else_block and last_conditional:
new_cond = Conditional(condition, Block(sp_assignments))
last_conditional.false_block = Block([new_cond])
last_conditional = new_cond
else:
last_conditional = Conditional(condition, Block(sp_assignments))
final_assignments.append(last_conditional)
if target == 'cpu' or not gpu_exclusive_conditions: # find out whether any of the ghost layers is not needed
for d in range(dim): common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
cond = sp.And(*[conditions[i] for i in range(dim) if d != i]) for direction in stencil:
add(cond, [d]) exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
elif target == 'gpu': for elementary_direction in direction:
full_conditions = [sp.And(*[conditions[i] for i in range(dim) if d != i]) for d in range(dim)] exclusions.remove(inverse_direction_string(elementary_direction))
for include in itertools.product(*[[1, 0]] * dim): common_exclusions.intersection_update(exclusions)
case_conditions = sp.And(*[c if value else sp.Not(c) for c, value in zip(full_conditions, include)]) ghost_layers = [[0, 0] for d in range(dim)]
dimensions_to_include = [i for i in range(dim) if include[i]] for direction in common_exclusions:
if dimensions_to_include: direction = direction_string_to_offset(direction)
add(case_conditions, dimensions_to_include, True) for d, s in enumerate(direction):
if s == 1:
ghost_layers[d][1] = 1
elif s == -1:
ghost_layers[d][0] = 1
ghost_layers = [(1, 0)] * dim def condition(direction):
"""exclude those staggered points that correspond to fluxes between ghost cells"""
exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
blocking = kwargs.get('cpu_blocking', None) for elementary_direction in direction:
if blocking: exclusions.remove(inverse_direction_string(elementary_direction))
del kwargs['cpu_blocking'] conditions = []
for e in exclusions:
if e in common_exclusions:
continue
offset = direction_string_to_offset(e)
for i, o in enumerate(offset):
if o == 1:
conditions.append(counters[i] < shape[i] - 1)
elif o == -1:
conditions.append(counters[i] > 0)
return sp.And(*conditions)
cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None) if gpu_exclusive_conditions:
if cpu_vectorize_info: outer_assignment = None
del kwargs['cpu_vectorize_info'] conditions = {direction: condition(direction) for direction in stencil}
openmp = kwargs.get('cpu_openmp', None) for num_conditions in range(len(stencil)):
if openmp: for combination in combinations(conditions.values(), num_conditions):
del kwargs['cpu_openmp'] for assignment in assignments:
direction = stencil[assignment.lhs.index[0]]
if conditions[direction] in combination:
assignment = SympyAssignment(assignment.lhs, assignment.rhs)
outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment)
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) inner_assignment = []
for assignment in assignments:
direction = stencil[assignment.lhs.index[0]]
inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs))
last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]),
Block(inner_assignment), outer_assignment)
final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
[SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
[last_conditional]
if target == 'cpu': if target == 'cpu':
remove_conditionals_in_staggered_kernel(ast) from pystencils.cpu import create_kernel as create_kernel_cpu
move_constants_before_loop(ast) ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, **kwargs)
omp_collapse = None else:
if blocking: ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
omp_collapse = loop_blocking(ast, blocking) return ast
if openmp:
from pystencils.cpu import add_openmp for assignment in assignments:
add_openmp(ast, num_threads=openmp, collapse=omp_collapse, assume_single_outer_loop=False) direction = stencil[assignment.lhs.index[0]]
if cpu_vectorize_info is True: sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
vectorize(ast) [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
elif isinstance(cpu_vectorize_info, dict): [SympyAssignment(assignment.lhs, assignment.rhs)]
vectorize(ast, **cpu_vectorize_info) last_conditional = Conditional(condition(direction), Block(sp_assignments))
final_assignments.append(last_conditional)
remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
move_constants_before_loop]
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target,
cpu_prepend_optimizations=prepend_optimizations, **kwargs)
return ast return ast
...@@ -16,6 +16,11 @@ def inverse_direction(direction): ...@@ -16,6 +16,11 @@ def inverse_direction(direction):
return tuple([-i for i in direction]) return tuple([-i for i in direction])
def inverse_direction_string(direction):
"""Returns inverse of given direction string"""
return offset_to_direction_string(inverse_direction(direction_string_to_offset(direction)))
def is_valid(stencil, max_neighborhood=None): def is_valid(stencil, max_neighborhood=None):
""" """
Tests if a nested sequence is a valid stencil i.e. all the inner sequences have the same length. Tests if a nested sequence is a valid stencil i.e. all the inner sequences have the same length.
......
...@@ -8,14 +8,14 @@ from types import MappingProxyType ...@@ -8,14 +8,14 @@ from types import MappingProxyType
import numpy as np import numpy as np
import sympy as sp import sympy as sp
from sympy.core.numbers import ImaginaryUnit from sympy.core.numbers import ImaginaryUnit
from sympy.logic.boolalg import Boolean from sympy.logic.boolalg import Boolean, BooleanFunction
import pystencils.astnodes as ast import pystencils.astnodes as ast
import pystencils.integer_functions import pystencils.integer_functions
from pystencils.assignment import Assignment from pystencils.assignment import Assignment
from pystencils.data_types import ( from pystencils.data_types import (
PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, get_base_type,
get_base_type, get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func) get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func)
from pystencils.field import AbstractField, Field, FieldType from pystencils.field import AbstractField, Field, FieldType
from pystencils.kernelparameters import FieldPointerSymbol from pystencils.kernelparameters import FieldPointerSymbol
from pystencils.simp.assignment_collection import AssignmentCollection from pystencils.simp.assignment_collection import AssignmentCollection
...@@ -630,13 +630,18 @@ def move_constants_before_loop(ast_node): ...@@ -630,13 +630,18 @@ def move_constants_before_loop(ast_node):
else: else:
target.insert_before(child, child_to_insert_before) target.insert_before(child, child_to_insert_before)
elif exists_already and exists_already.rhs == child.rhs: elif exists_already and exists_already.rhs == child.rhs:
pass if target.args.index(exists_already) > target.args.index(child_to_insert_before):
assert target.args.count(exists_already) == 1
assert target.args.count(child_to_insert_before) == 1
target.args.remove(exists_already)
target.insert_before(exists_already, child_to_insert_before)
else: else:
# this variable already exists in outer block, but with different rhs # this variable already exists in outer block, but with different rhs
# -> symbol has to be renamed # -> symbol has to be renamed
assert isinstance(child.lhs, TypedSymbol) assert isinstance(child.lhs, TypedSymbol)
new_symbol = TypedSymbol(sp.Dummy().name, child.lhs.dtype) new_symbol = TypedSymbol(sp.Dummy().name, child.lhs.dtype)
target.insert_before(ast.SympyAssignment(new_symbol, child.rhs), child_to_insert_before) target.insert_before(ast.SympyAssignment(new_symbol, child.rhs, is_const=child.is_const),
child_to_insert_before)
substitute_variables[child.lhs] = new_symbol substitute_variables[child.lhs] = new_symbol
...@@ -846,7 +851,7 @@ class KernelConstraintsCheck: ...@@ -846,7 +851,7 @@ class KernelConstraintsCheck:
return cast_func( return cast_func(
self.process_expression(rhs.args[0], type_constants=False), self.process_expression(rhs.args[0], type_constants=False),
rhs.dtype) rhs.dtype)
elif isinstance(rhs, sp.boolalg.BooleanFunction) or \ elif isinstance(rhs, BooleanFunction) or \
type(rhs) in pystencils.integer_functions.__dict__.values(): type(rhs) in pystencils.integer_functions.__dict__.values():
new_args = [self.process_expression(a, type_constants) for a in rhs.args] new_args = [self.process_expression(a, type_constants) for a in rhs.args]
types_of_expressions = [get_type_of_expression(a) for a in new_args] types_of_expressions = [get_type_of_expression(a) for a in new_args]
...@@ -1025,7 +1030,7 @@ def insert_casts(node): ...@@ -1025,7 +1030,7 @@ def insert_casts(node):
types = [get_type_of_expression(arg) for arg in args] types = [get_type_of_expression(arg) for arg in args]
assert len(types) > 0 assert len(types) > 0
# Never ever, ever collate to float type for boolean functions! # Never ever, ever collate to float type for boolean functions!
target = collate_types(types, forbid_collation_to_float=isinstance(node.func, sp.boolalg.BooleanFunction)) target = collate_types(types, forbid_collation_to_float=isinstance(node.func, BooleanFunction))
zipped = list(zip(args, types)) zipped = list(zip(args, types))
if target.func is PointerType: if target.func is PointerType:
assert node.func is sp.Add assert node.func is sp.Add
...@@ -1064,15 +1069,19 @@ def insert_casts(node): ...@@ -1064,15 +1069,19 @@ def insert_casts(node):
return node.func(*args) return node.func(*args)
def remove_conditionals_in_staggered_kernel(function_node: ast.KernelFunction) -> None: def remove_conditionals_in_staggered_kernel(function_node: ast.KernelFunction, include_first=True) -> None:
"""Removes conditionals of a kernel that iterates over staggered positions by splitting the loops at last element""" """Removes conditionals of a kernel that iterates over staggered positions by splitting the loops at last or
first and last element"""
all_inner_loops = [l for l in function_node.atoms(ast.LoopOverCoordinate) if l.is_innermost_loop] all_inner_loops = [l for l in function_node.atoms(ast.LoopOverCoordinate) if l.is_innermost_loop]
assert len(all_inner_loops) == 1, "Transformation works only on kernels with exactly one inner loop" assert len(all_inner_loops) == 1, "Transformation works only on kernels with exactly one inner loop"
inner_loop = all_inner_loops.pop() inner_loop = all_inner_loops.pop()
for loop in parents_of_type(inner_loop, ast.LoopOverCoordinate, include_current=True): for loop in parents_of_type(inner_loop, ast.LoopOverCoordinate, include_current=True):
cut_loop(loop, [loop.stop - 1]) if include_first:
cut_loop(loop, [loop.start + 1, loop.stop - 1])
else:
cut_loop(loop, [loop.stop - 1])
simplify_conditionals(function_node.body, loop_counter_simplification=True) simplify_conditionals(function_node.body, loop_counter_simplification=True)
cleanup_blocks(function_node.body) cleanup_blocks(function_node.body)
......
...@@ -11,8 +11,9 @@ def test_blocking_staggered(): ...@@ -11,8 +11,9 @@ def test_blocking_staggered():
f[0, 0, 0] - f[0, -1, 0], f[0, 0, 0] - f[0, -1, 0],
f[0, 0, 0] - f[0, 0, -1], f[0, 0, 0] - f[0, 0, -1],
] ]
kernel = ps.create_staggered_kernel(stag, terms, cpu_blocking=(3, 16, 8)).compile() assignments = [ps.Assignment(stag.staggered_access(d), terms[i]) for i, d in enumerate(stag.staggered_stencil)]
reference_kernel = ps.create_staggered_kernel(stag, terms).compile() kernel = ps.create_staggered_kernel(assignments, cpu_blocking=(3, 16, 8)).compile()
reference_kernel = ps.create_staggered_kernel(assignments).compile()
print(ps.show_code(kernel.ast)) print(ps.show_code(kernel.ast))
f_arr = np.random.rand(80, 33, 19) f_arr = np.random.rand(80, 33, 19)
......
...@@ -32,8 +32,7 @@ def test_cuda_but_not_c(): ...@@ -32,8 +32,7 @@ def test_cuda_but_not_c():
}) })
ast = pystencils.create_kernel(assignments, 'cpu') ast = pystencils.create_kernel(assignments, 'cpu')
code = str(pystencils.show_code(ast)) print(pystencils.show_code(ast))
assert "Not supported" in code
def test_cuda_unknown(): def test_cuda_unknown():
...@@ -46,4 +45,3 @@ def test_cuda_unknown(): ...@@ -46,4 +45,3 @@ def test_cuda_unknown():
ast = pystencils.create_kernel(assignments, 'gpu') ast = pystencils.create_kernel(assignments, 'gpu')
code = str(pystencils.show_code(ast)) code = str(pystencils.show_code(ast))
print(code) print(code)
assert "Not supported in CUDA" in code
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from pystencils.session import * from pystencils.session import *
from pystencils.fd.derivation import * from pystencils.fd.derivation import *
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# 2D standard stencils # 2D standard stencils
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
stencil = [(-1, 0), (1, 0), (0, -1), (0, 1), (0, 0)] stencil = [(-1, 0), (1, 0), (0, -1), (0, 1), (0, 0)]
standard_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil) standard_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil)
f = ps.fields("f: [2D]") f = ps.fields("f: [2D]")
standard_2d_00_res = standard_2d_00.get_stencil() standard_2d_00_res = standard_2d_00.get_stencil()
res = standard_2d_00_res.apply(f.center) res = standard_2d_00_res.apply(f.center)
expected = f[-1, 0] - 2 * f[0, 0] + f[1, 0] expected = f[-1, 0] - 2 * f[0, 0] + f[1, 0]
assert res == expected assert res == expected
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
assert standard_2d_00_res.accuracy == 2 assert standard_2d_00_res.accuracy == 2
assert not standard_2d_00_res.is_isotropic assert not standard_2d_00_res.is_isotropic
standard_2d_00_res standard_2d_00_res
``` ```
%% Output %% Output
Finite difference stencil of accuracy 2, isotropic error: False Finite difference stencil of accuracy 2, isotropic error: False
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
standard_2d_00.get_stencil().as_matrix() standard_2d_00.get_stencil().as_matrix()
``` ```
%% Output %% Output
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last) $$\left[\begin{matrix}0 & 0 & 0\\1 & -2 & 1\\0 & 0 & 0\end{matrix}\right]$$
<ipython-input-4-ea41cd8e50a0> in <module> ⎡0 0 0⎤
----> 1 standard_2d_00.get_stencil().as_matrix() ⎢ ⎥
⎢1 -2 1⎥
~/pystencils/pystencils/pystencils/fd/derivation.py in as_matrix(self) ⎢ ⎥
185 for direction, weight in zip(self.stencil, self.weights): ⎣0 0 0⎦
186 result[max_offset - direction[1], max_offset + direction[0]] = weight
--> 187 result.tomatrix().tomatrix()
188 print("test")
189 if dim == 3:
~/anaconda3/envs/pystencils/lib/python3.7/site-packages/sympy/matrices/matrices.py in __getattr__(self, attr)
2139 else:
2140 raise AttributeError(
-> 2141 "%s has no attribute %s." % (self.__class__.__name__, attr))
2142
2143 def __len__(self):
AttributeError: MutableDenseMatrix has no attribute tomatrix.
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# 2D isotropic stencils # 2D isotropic stencils
## second x-derivative ## second x-derivative
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
stencil = [(i, j) for i in (-1, 0, 1) for j in (-1, 0, 1)] stencil = [(i, j) for i in (-1, 0, 1) for j in (-1, 0, 1)]
isotropic_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil) isotropic_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil)
isotropic_2d_00_res = isotropic_2d_00.get_stencil(isotropic=True) isotropic_2d_00_res = isotropic_2d_00.get_stencil(isotropic=True)
assert isotropic_2d_00_res.is_isotropic assert isotropic_2d_00_res.is_isotropic
assert isotropic_2d_00_res.accuracy == 2 assert isotropic_2d_00_res.accuracy == 2
isotropic_2d_00_res isotropic_2d_00_res
``` ```
%% Output
Finite difference stencil of accuracy 2, isotropic error: True
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
isotropic_2d_00_res.as_matrix() isotropic_2d_00_res.as_matrix()
``` ```
%% Output
$$\left[\begin{matrix}\frac{1}{12} & - \frac{1}{6} & \frac{1}{12}\\\frac{5}{6} & - \frac{5}{3} & \frac{5}{6}\\\frac{1}{12} & - \frac{1}{6} & \frac{1}{12}\end{matrix}\right]$$
⎡1/12 -1/6 1/12⎤
⎢ ⎥
⎢5/6 -5/3 5/6 ⎥
⎢ ⎥
⎣1/12 -1/6 1/12⎦
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
plt.figure(figsize=(2,2)) plt.figure(figsize=(2,2))
isotropic_2d_00_res.visualize() isotropic_2d_00_res.visualize()
``` ```
%% Output
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
expected_result = sp.Matrix([[1, -2, 1], [10, -20, 10], [1, -2, 1]]) / 12 expected_result = sp.Matrix([[1, -2, 1], [10, -20, 10], [1, -2, 1]]) / 12
assert expected_result == isotropic_2d_00_res.as_matrix() assert expected_result == isotropic_2d_00_res.as_matrix()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
type(isotropic_2d_00_res.as_matrix()) type(isotropic_2d_00_res.as_matrix())
``` ```
%% Output
sympy.matrices.dense.MutableDenseMatrix
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
type(expected_result) type(expected_result)
``` ```
%% Output
sympy.matrices.dense.MutableDenseMatrix
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Isotropic laplacian ## Isotropic laplacian
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
isotropic_2d_11 = FiniteDifferenceStencilDerivation((1,1), stencil) isotropic_2d_11 = FiniteDifferenceStencilDerivation((1,1), stencil)
isotropic_2d_11_res = isotropic_2d_11.get_stencil(isotropic=True) isotropic_2d_11_res = isotropic_2d_11.get_stencil(isotropic=True)
iso_laplacian = isotropic_2d_00_res.as_matrix() + isotropic_2d_11_res.as_matrix() iso_laplacian = isotropic_2d_00_res.as_matrix() + isotropic_2d_11_res.as_matrix()
iso_laplacian iso_laplacian
``` ```
%% Output
$$\left[\begin{matrix}\frac{1}{6} & \frac{2}{3} & \frac{1}{6}\\\frac{2}{3} & - \frac{10}{3} & \frac{2}{3}\\\frac{1}{6} & \frac{2}{3} & \frac{1}{6}\end{matrix}\right]$$
⎡1/6 2/3 1/6⎤
⎢ ⎥
⎢2/3 -10/3 2/3⎥
⎢ ⎥
⎣1/6 2/3 1/6⎦
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
expected_result = sp.Matrix([[1, 4, 1], [4, -20, 4], [1, 4, 1]]) / 6 expected_result = sp.Matrix([[1, 4, 1], [4, -20, 4], [1, 4, 1]]) / 6
assert iso_laplacian == expected_result assert iso_laplacian == expected_result
``` ```
%% Cell type:markdown id: tags:
# stencils for staggered fields
%% Cell type:code id: tags:
``` python
half = sp.Rational(1, 2)
fd_points_ex = (
(half, 0),
(-half, 0),
(half, 1),
(half, -1),
(-half, 1),
(-half, -1)
)
assert set(fd_points_ex) == set(FiniteDifferenceStaggeredStencilDerivation("E", 2).stencil)
fd_points_ey = (
(0, half),
(0, -half),
(-1,-half),
(-1, half),
(1, -half),
(1, half)
)
assert set(fd_points_ey) == set(FiniteDifferenceStaggeredStencilDerivation("N",2).stencil)
fd_points_c = (
(half, half),
(-half, -half),
(half, -half),
(-half, half)
)
assert set(fd_points_c) == set(FiniteDifferenceStaggeredStencilDerivation("NE",2).stencil)
assert len(FiniteDifferenceStaggeredStencilDerivation("E",3).points) == 10
assert len(FiniteDifferenceStaggeredStencilDerivation("NE",3).points) == 12
assert len(FiniteDifferenceStaggeredStencilDerivation("TNE",3).points) == 8
```
%% Cell type:code id: tags:
``` python
c = ps.fields("c: [2D]")
c3 = ps.fields("c3: [3D]")
assert FiniteDifferenceStaggeredStencilDerivation("E", 2, (0,)).apply(c) == c[1, 0] - c[0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("W", 2, (0,)).apply(c) == c[0, 0] - c[-1, 0]
assert FiniteDifferenceStaggeredStencilDerivation("N", 2, (1,)).apply(c) == c[0, 1] - c[0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("S", 2, (1,)).apply(c) == c[0, 0] - c[0, -1]
assert FiniteDifferenceStaggeredStencilDerivation("E", 3, (0,)).apply(c3) == c3[1, 0, 0] - c3[0, 0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("W", 3, (0,)).apply(c3) == c3[0, 0, 0] - c3[-1, 0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("N", 3, (1,)).apply(c3) == c3[0, 1, 0] - c3[0, 0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("S", 3, (1,)).apply(c3) == c3[0, 0, 0] - c3[0, -1, 0]
assert FiniteDifferenceStaggeredStencilDerivation("T", 3, (2,)).apply(c3) == c3[0, 0, 1] - c3[0, 0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("B", 3, (2,)).apply(c3) == c3[0, 0, 0] - c3[0, 0, -1]
assert FiniteDifferenceStaggeredStencilDerivation("NE", 2, (0,)).apply(c) + \
FiniteDifferenceStaggeredStencilDerivation("NE", 2, (1,)).apply(c) == c[1, 1] - c[0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("NE", 3, (0,)).apply(c3) + \
FiniteDifferenceStaggeredStencilDerivation("NE", 3, (1,)).apply(c3) == c3[1, 1, 0] - c3[0, 0, 0]
```
%% Cell type:code id: tags:
``` python
d = FiniteDifferenceStaggeredStencilDerivation("NE", 2, (0, 1))
assert d.apply(c) == c[0,0] + c[1,1] - c[1,0] - c[0,1]
d.visualize()
```
%% Output
%% Cell type:code id: tags:
``` python
v3 = ps.fields("v(3): [3D]")
assert FiniteDifferenceStaggeredStencilDerivation("E", 3, (0,)).apply(v3) == \
sp.Matrix([v3[1,0,0](i) - v3[0,0,0](i) for i in range(*v3.index_shape)])
```
%% Cell type:code id: tags:
``` python
```
......
...@@ -28,6 +28,9 @@ def test_field_basic(): ...@@ -28,6 +28,9 @@ def test_field_basic():
assert neighbor.offsets == (-1, 1) assert neighbor.offsets == (-1, 1)
assert '_' in neighbor._latex('dummy') assert '_' in neighbor._latex('dummy')
f = Field.create_fixed_size('f', (8, 8, 2, 2, 2), index_dimensions=3)
assert f.center_vector == sp.Matrix([[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)])
f = Field.create_generic('f', spatial_dimensions=5, index_dimensions=2) f = Field.create_generic('f', spatial_dimensions=5, index_dimensions=2)
field_access = f[1, -1, 2, -3, 0](1, 0) field_access = f[1, -1, 2, -3, 0](1, 0)
assert field_access.offsets == (1, -1, 2, -3, 0) assert field_access.offsets == (1, -1, 2, -3, 0)
...@@ -135,17 +138,30 @@ def test_staggered(): ...@@ -135,17 +138,30 @@ def test_staggered():
j1, j2, j3 = ps.fields('j1(2), j2(2,2), j3(2,2,2) : double[2D]', field_type=FieldType.STAGGERED) j1, j2, j3 = ps.fields('j1(2), j2(2,2), j3(2,2,2) : double[2D]', field_type=FieldType.STAGGERED)
assert j1[0, 1](1) == j1.staggered_access((0, sp.Rational(1, 2))) assert j1[0, 1](1) == j1.staggered_access((0, sp.Rational(1, 2)))
assert j1[1, 1](1) == j1.staggered_access((1, sp.Rational(1, 2)))
assert j1[0, 2](1) == j1.staggered_access((0, sp.Rational(3, 2)))
assert j1[0, 1](1) == j1.staggered_access("N") assert j1[0, 1](1) == j1.staggered_access("N")
assert j1[0, 0](1) == j1.staggered_access("S") assert j1[0, 0](1) == j1.staggered_access("S")
assert j1.staggered_vector_access("N") == sp.Matrix([j1.staggered_access("N")])
assert j2[0, 1](1, 1) == j2.staggered_access((0, sp.Rational(1, 2)), 1) assert j2[0, 1](1, 1) == j2.staggered_access((0, sp.Rational(1, 2)), 1)
assert j2[0, 1](1, 1) == j2.staggered_access("N", 1) assert j2[0, 1](1, 1) == j2.staggered_access("N", 1)
assert j2.staggered_vector_access("N") == sp.Matrix([j2.staggered_access("N", 0), j2.staggered_access("N", 1)])
assert j3[0, 1](1, 1, 1) == j3.staggered_access((0, sp.Rational(1, 2)), (1, 1)) assert j3[0, 1](1, 1, 1) == j3.staggered_access((0, sp.Rational(1, 2)), (1, 1))
assert j3[0, 1](1, 1, 1) == j3.staggered_access("N", (1, 1)) assert j3[0, 1](1, 1, 1) == j3.staggered_access("N", (1, 1))
assert j3.staggered_vector_access("N") == sp.Matrix([[j3.staggered_access("N", (i, j))
for j in range(2)] for i in range(2)])
# D2Q9 # D2Q9
k = ps.fields('k(4) : double[2D]', field_type=FieldType.STAGGERED) k = ps.fields('k(4) : double[2D]', field_type=FieldType.STAGGERED)
assert k[1, 1](2) == k.staggered_access("NE") assert k[1, 1](2) == k.staggered_access("NE")
assert k[0, 0](2) == k.staggered_access("SW") assert k[0, 0](2) == k.staggered_access("SW")
assert k[0, 0](3) == k.staggered_access("NW")
# sign reversed when using as flux field
r = ps.fields('r(2) : double[2D]', field_type=FieldType.STAGGERED_FLUX)
assert r[0, 0](0) == r.staggered_access("W")
assert -r[1, 0](0) == r.staggered_access("E")
...@@ -55,7 +55,8 @@ def test_staggered_iteration(): ...@@ -55,7 +55,8 @@ def test_staggered_iteration():
for d in range(dim): for d in range(dim):
expressions.append(sum(f[o] for o in offsets_in_plane(d, 0, dim)) - expressions.append(sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
sum(f[o] for o in offsets_in_plane(d, -1, dim))) sum(f[o] for o in offsets_in_plane(d, -1, dim)))
func_optimized = create_staggered_kernel(s, expressions).compile() assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
func_optimized = create_staggered_kernel(assignments).compile()
assert not func_optimized.ast.atoms(Conditional), "Loop cutting optimization did not work" assert not func_optimized.ast.atoms(Conditional), "Loop cutting optimization did not work"
func(f=f_arr, s=s_arr_ref) func(f=f_arr, s=s_arr_ref)
...@@ -111,8 +112,10 @@ def test_staggered_gpu(): ...@@ -111,8 +112,10 @@ def test_staggered_gpu():
s = ps.fields("s({dim}): double[{dim}D]".format(dim=dim), field_type=FieldType.STAGGERED) s = ps.fields("s({dim}): double[{dim}D]".format(dim=dim), field_type=FieldType.STAGGERED)
expressions = [(f[0, 0] + f[-1, 0]) / 2, expressions = [(f[0, 0] + f[-1, 0]) / 2,
(f[0, 0] + f[0, -1]) / 2] (f[0, 0] + f[0, -1]) / 2]
kernel_ast = ps.create_staggered_kernel(s, expressions, target='gpu', gpu_exclusive_conditions=True) assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
kernel_ast = ps.create_staggered_kernel(assignments, target='gpu', gpu_exclusive_conditions=True)
assert len(kernel_ast.atoms(Conditional)) == 4 assert len(kernel_ast.atoms(Conditional)) == 4
kernel_ast = ps.create_staggered_kernel(s, expressions, target='gpu', gpu_exclusive_conditions=False) assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
kernel_ast = ps.create_staggered_kernel(assignments, target='gpu', gpu_exclusive_conditions=False)
assert len(kernel_ast.atoms(Conditional)) == 3 assert len(kernel_ast.atoms(Conditional)) == 3
import pystencils as ps
import numpy as np
import sympy as sp
class TestStaggeredDiffusion:
def _run(self, num_neighbors):
L = (40, 40)
D = 0.066
dt = 1
T = 100
dh = ps.create_data_handling(L, periodicity=True, default_target='cpu')
c = dh.add_array('c', values_per_cell=1)
j = dh.add_array('j', values_per_cell=num_neighbors, field_type=ps.FieldType.STAGGERED_FLUX)
x_staggered = - c[-1, 0] + c[0, 0]
y_staggered = - c[0, -1] + c[0, 0]
xy_staggered = - c[-1, -1] + c[0, 0]
xY_staggered = - c[-1, 1] + c[0, 0]
jj = j.staggered_access
divergence = -1 * D / (1 + sp.sqrt(2) if j.index_shape[0] == 4 else 1) * \
sum([jj(d) / sp.Matrix(ps.stencil.direction_string_to_offset(d)).norm() for d in j.staggered_stencil
+ [ps.stencil.inverse_direction_string(d) for d in j.staggered_stencil]])
update = [ps.Assignment(c.center, c.center + dt * divergence)]
flux = [ps.Assignment(j.staggered_access("W"), x_staggered),
ps.Assignment(j.staggered_access("S"), y_staggered)]
if j.index_shape[0] == 4:
flux += [ps.Assignment(j.staggered_access("SW"), xy_staggered),
ps.Assignment(j.staggered_access("NW"), xY_staggered)]
staggered_kernel = ps.create_staggered_kernel(flux, target=dh.default_target).compile()
div_kernel = ps.create_kernel(update, target=dh.default_target).compile()
def time_loop(steps):
sync = dh.synchronization_function([c.name])
dh.all_to_gpu()
for i in range(steps):
sync()
dh.run_kernel(staggered_kernel)
dh.run_kernel(div_kernel)
dh.all_to_cpu()
def init():
dh.fill(c.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
dh.fill(c.name, 0)
dh.fill(j.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
dh.cpu_arrays[c.name][L[0] // 2:L[0] // 2 + 2, L[1] // 2:L[1] // 2 + 2] = 1.0
init()
time_loop(T)
reference = np.empty(L)
for x in range(L[0]):
for y in range(L[1]):
r = np.array([x, y]) - L[0] / 2 + 0.5
reference[x, y] = (4 * np.pi * D * T)**(-dh.dim / 2) * np.exp(-np.dot(r, r) / (4 * D * T)) * (2**dh.dim)
assert np.abs(dh.gather_array(c.name) - reference).max() < 5e-4
def test_diffusion_2(self):
self._run(2)
def test_diffusion_4(self):
self._run(4)
def test_staggered_subexpressions():
dh = ps.create_data_handling((10, 10), periodicity=True, default_target='cpu')
j = dh.add_array('j', values_per_cell=2, field_type=ps.FieldType.STAGGERED)
c = sp.symbols("c")
assignments = [ps.Assignment(j.staggered_access("W"), c),
ps.Assignment(c, 1)]
ps.create_staggered_kernel(assignments, target=dh.default_target).compile()
def test_staggered_loop_cutting():
dh = ps.create_data_handling((4, 4), periodicity=True, default_target='cpu')
j = dh.add_array('j', values_per_cell=4, field_type=ps.FieldType.STAGGERED)
assignments = [ps.Assignment(j.staggered_access("SW"), 1)]
ast = ps.create_staggered_kernel(assignments, target=dh.default_target)
assert not ast.atoms(ps.astnodes.Conditional)
import sympy as sp import sympy as sp
import numpy as np
import pystencils as ps
from pystencils import data_types from pystencils import data_types
from pystencils.data_types import * from pystencils.data_types import TypedSymbol, get_type_of_expression, VectorType, collate_types, create_type
from pystencils.kernelparameters import FieldShapeSymbol
def test_parsing(): def test_parsing():
...@@ -25,7 +25,6 @@ def test_collation(): ...@@ -25,7 +25,6 @@ def test_collation():
def test_dtype_of_constants(): def test_dtype_of_constants():
# Some come constants are neither of type Integer,Float,Rational and don't have args # Some come constants are neither of type Integer,Float,Rational and don't have args
# >>> isinstance(pi, Integer) # >>> isinstance(pi, Integer)
# False # False
...@@ -39,13 +38,25 @@ def test_dtype_of_constants(): ...@@ -39,13 +38,25 @@ def test_dtype_of_constants():
def test_assumptions(): def test_assumptions():
x = ps.fields('x: float32[3d]')
x = pystencils.fields('x: float32[3d]')
assert x.shape[0].is_nonnegative assert x.shape[0].is_nonnegative
assert (2 * x.shape[0]).is_nonnegative assert (2 * x.shape[0]).is_nonnegative
assert (2 * x.shape[0]).is_integer assert (2 * x.shape[0]).is_integer
assert(TypedSymbol('a', create_type('uint64'))).is_nonnegative assert (TypedSymbol('a', create_type('uint64'))).is_nonnegative
assert (TypedSymbol('a', create_type('uint64'))).is_positive is None assert (TypedSymbol('a', create_type('uint64'))).is_positive is None
assert (TypedSymbol('a', create_type('uint64')) + 1).is_positive assert (TypedSymbol('a', create_type('uint64')) + 1).is_positive
assert (x.shape[0] + 1).is_real assert (x.shape[0] + 1).is_real
def test_sqrt_of_integer():
"""Regression test for bug where sqrt(3) was classified as integer"""
f = ps.fields("f: [1D]")
tmp = sp.symbols("tmp")
assignments = [ps.Assignment(tmp, sp.sqrt(3)),
ps.Assignment(f[0], tmp)]
arr = np.array([1], dtype=np.float64)
kernel = ps.create_kernel(assignments).compile()
kernel(f=arr)
assert 1.7 < arr[0] < 1.8