Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (44)
Showing
with 785 additions and 197 deletions
......@@ -3,6 +3,7 @@ import pytest
import tempfile
import runpy
import sys
import warnings
# Trigger config file reading / creation once - to avoid race conditions when multiple instances are creating it
# at the same time
from pystencils.cpu import cpujit
......@@ -128,8 +129,11 @@ class IPyNbFile(pytest.File):
exporter.exclude_input_prompt = True
notebook_contents = self.fspath.open()
notebook = nbformat.read(notebook_contents, 4)
code, _ = exporter.from_notebook_node(notebook)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "IPython.core.inputsplitter is deprecated")
notebook = nbformat.read(notebook_contents, 4)
code, _ = exporter.from_notebook_node(notebook)
yield IPyNbTest(self.name, self, code)
def teardown(self):
......
......@@ -310,6 +310,7 @@ class Block(Node):
def insert_before(self, new_node, insert_before):
new_node.parent = self
assert self._nodes.count(insert_before) == 1
idx = self._nodes.index(insert_before)
# move all assignment (definitions to the top)
......@@ -337,6 +338,7 @@ class Block(Node):
return tmp
def replace(self, child, replacements):
assert self._nodes.count(child) == 1
idx = self._nodes.index(child)
del self._nodes[idx]
if type(replacements) is list:
......@@ -516,12 +518,13 @@ class LoopOverCoordinate(Node):
class SympyAssignment(Node):
def __init__(self, lhs_symbol, rhs_expr, is_const=True):
def __init__(self, lhs_symbol, rhs_expr, is_const=True, use_auto=False):
super(SympyAssignment, self).__init__(parent=None)
self._lhs_symbol = lhs_symbol
self.rhs = sp.sympify(rhs_expr)
self._is_const = is_const
self._is_declaration = self.__is_declaration()
self.use_auto = use_auto
def __is_declaration(self):
if isinstance(self._lhs_symbol, cast_func):
......
......@@ -4,16 +4,17 @@ from typing import Set
import numpy as np
import sympy as sp
from sympy.core import S
from sympy.logic.boolalg import BooleanFalse, BooleanTrue
from sympy.printing.ccode import C89CodePrinter
from pystencils.astnodes import KernelFunction, Node
from pystencils.cpu.vectorization import vec_all, vec_any
from pystencils.data_types import (
PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression,
reinterpret_cast_func, vector_memory_access)
PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, reinterpret_cast_func,
vector_memory_access)
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.integer_functions import (
bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor,
int_div, int_power_of_2, modulo_ceil)
bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, int_div, int_power_of_2, modulo_ceil)
try:
from sympy.printing.ccode import C99CodePrinter as CCodePrinter
......@@ -32,9 +33,9 @@ def generate_c(ast_node: Node,
with_globals=True) -> str:
"""Prints an abstract syntax tree node as C or CUDA code.
This function does not need to distinguish between C, C++ or CUDA code, it just prints 'C-like' code as encoded
in the abstract syntax tree (AST). The AST is built differently for C or CUDA by calling different create_kernel
functions.
This function does not need to distinguish for most AST nodes between C, C++ or CUDA code, it just prints 'C-like'
code as encoded in the abstract syntax tree (AST). The AST is built differently for C or CUDA by calling different
create_kernel functions.
Args:
ast_node:
......@@ -229,11 +230,15 @@ class CBackend:
def _print_SympyAssignment(self, node):
if node.is_declaration:
if node.is_const:
prefix = 'const '
if node.use_auto:
data_type = 'auto '
else:
prefix = ''
data_type = prefix + self._print(node.lhs.dtype).replace(' const', '') + " "
if node.is_const:
prefix = 'const '
else:
prefix = ''
data_type = prefix + self._print(node.lhs.dtype).replace(' const', '') + " "
return "%s%s = %s;" % (data_type,
self.sympy_printer.doprint(node.lhs),
self.sympy_printer.doprint(node.rhs))
......@@ -292,6 +297,10 @@ class CBackend:
return ""
def _print_Conditional(self, node):
if type(node.condition_expr) is BooleanTrue:
return self._print_Block(node.true_block)
elif type(node.condition_expr) is BooleanFalse:
return self._print_Block(node.false_block)
cond_type = get_type_of_expression(node.condition_expr)
if isinstance(cond_type, VectorType):
raise ValueError("Problem with Conditional inside vectorized loop - use vec_any or vec_all")
......@@ -381,7 +390,8 @@ class CustomSympyPrinter(CCodePrinter):
elif expr.func == int_div:
return "((%s) / (%s))" % (self._print(expr.args[0]), self._print(expr.args[1]))
else:
return super(CustomSympyPrinter, self)._print_Function(expr)
arg_str = ', '.join(self._print(a) for a in expr.args)
return f'{expr.name}({arg_str})'
def _typed_number(self, number, dtype):
res = self._print(number)
......
......@@ -34,6 +34,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
transformation :func:`pystencils.transformation.split_inner_loop`
iteration_slice: if not None, iteration is done only over this slice of the field
ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
that should be excluded from the iteration.
if None, the number of ghost layers is determined automatically and assumed to be equal for a
all dimensions
skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
......
......@@ -2,18 +2,17 @@ import warnings
from typing import Container, Union
import sympy as sp
from sympy.logic.boolalg import BooleanFunction
import pystencils.astnodes as ast
from pystencils.backends.simd_instruction_sets import get_vector_instruction_set
from pystencils.data_types import (
PointerType, TypedSymbol, VectorType, cast_func, collate_types, get_type_of_expression,
vector_memory_access)
PointerType, TypedSymbol, VectorType, cast_func, collate_types, get_type_of_expression, vector_memory_access)
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.field import Field
from pystencils.integer_functions import modulo_ceil, modulo_floor
from pystencils.sympyextensions import fast_subs
from pystencils.transformations import (
cut_loop, filtered_tree_iteration, replace_inner_stride_with_one)
from pystencils.transformations import cut_loop, filtered_tree_iteration, replace_inner_stride_with_one
# noinspection PyPep8Naming
......@@ -177,7 +176,7 @@ def insert_vector_casts(ast_node):
visit_expr(expr.args[4]))
elif isinstance(expr, cast_func):
return expr
elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, sp.boolalg.BooleanFunction):
elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction):
new_args = [visit_expr(a) for a in expr.args]
arg_types = [get_type_of_expression(a) for a in new_args]
if not any(type(t) is VectorType for t in arg_types):
......
......@@ -4,14 +4,14 @@ from functools import partial
from typing import Tuple
import numpy as np
import pystencils
import sympy as sp
import sympy.codegen.ast
from sympy.core.cache import cacheit
from sympy.logic.boolalg import Boolean, BooleanFunction
import pystencils
from pystencils.cache import memorycache, memorycache_if_hashable
from pystencils.utils import all_equal
from sympy.core.cache import cacheit
from sympy.logic.boolalg import Boolean
try:
import llvmlite.ir as ir
......@@ -541,14 +541,20 @@ def get_type_of_expression(expr,
elif isinstance(expr, sp.Indexed):
typed_symbol = expr.base.label
return typed_symbol.dtype.base_type
elif isinstance(expr, (sp.boolalg.Boolean, sp.boolalg.BooleanFunction)):
elif isinstance(expr, (Boolean, BooleanFunction)):
# if any arg is of vector type return a vector boolean, else return a normal scalar boolean
result = create_type("bool")
vec_args = [get_type(a) for a in expr.args if isinstance(get_type(a), VectorType)]
if vec_args:
result = VectorType(result, width=vec_args[0].width)
return result
elif isinstance(expr, (sp.Pow, sp.Sum, sp.Product)):
elif isinstance(expr, sp.Pow):
base_type = get_type(expr.args[0])
if expr.exp.is_integer:
return base_type
else:
return collate_types([create_type(default_float_type), base_type])
elif isinstance(expr, (sp.Sum, sp.Product)):
return get_type(expr.args[0])
elif isinstance(expr, sp.Expr):
expr: sp.Expr
......@@ -821,3 +827,6 @@ class TypedImaginaryUnit(TypedSymbol):
__xnew__ = staticmethod(__new_stage2__)
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
def __getnewargs__(self):
return (self.dtype,)
......@@ -109,11 +109,14 @@ class ParallelDataHandling(DataHandling):
if hasattr(values_per_cell, '__len__'):
raise NotImplementedError("Parallel data handling does not support multiple index dimensions")
self._fieldInformation[name] = {'ghost_layers': ghost_layers,
'values_per_cell': values_per_cell,
'layout': layout,
'dtype': dtype,
'alignment': alignment}
self._fieldInformation[name] = {
'ghost_layers': ghost_layers,
'values_per_cell': values_per_cell,
'layout': layout,
'dtype': dtype,
'alignment': alignment,
'field_type': field_type,
}
layout_map = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf,
'f': wlb.field.Layout.fzyx,
......
......@@ -100,6 +100,7 @@ class SerialDataHandling(DataHandling):
'layout': layout,
'dtype': dtype,
'alignment': alignment,
'field_type': field_type,
}
index_dimensions = len(values_per_cell)
......
import warnings
from collections import defaultdict
import itertools
import numpy as np
import sympy as sp
from pystencils.field import Field
from pystencils.stencil import direction_string_to_offset
from pystencils.sympyextensions import multidimensional_sum, prod
from pystencils.utils import LinearEquationSystem, fully_contains
......@@ -228,3 +230,120 @@ class FiniteDifferenceStencilDerivation:
def __repr__(self):
return "Finite difference stencil of accuracy {}, isotropic error: {}".format(self.accuracy,
self.is_isotropic)
class FiniteDifferenceStaggeredStencilDerivation:
"""Derives a finite difference stencil for application at a staggered position
Args:
neighbor: the neighbor direction string or vector at whose staggered position to calculate the derivative
dim: how many dimensions (2 or 3)
derivative: a tuple of directions over which to perform derivatives
"""
def __init__(self, neighbor, dim, derivative=tuple()):
if type(neighbor) is str:
neighbor = direction_string_to_offset(neighbor)
if dim == 2:
assert neighbor[dim:] == 0
assert derivative is tuple() or max(derivative) < dim
neighbor = sp.Matrix(neighbor[:dim])
pos = neighbor / 2
def unitvec(i):
"""return the `i`-th unit vector in three dimensions"""
a = np.zeros(dim, dtype=int)
a[i] = 1
return a
def flipped(a, i):
"""return `a` with its `i`-th element's sign flipped"""
a = a.copy()
a[i] *= -1
return a
# determine the points to use, coordinates are relative to position
points = []
if np.linalg.norm(neighbor, 1) == 1:
main_points = [neighbor / 2, neighbor / -2]
elif np.linalg.norm(neighbor, 1) == 2:
nonzero_indices = [i for i, v in enumerate(neighbor) if v != 0 and i < dim]
main_points = [neighbor / 2, neighbor / -2, flipped(neighbor / 2, nonzero_indices[0]),
flipped(neighbor / -2, nonzero_indices[0])]
else:
main_points = [neighbor.multiply_elementwise(sp.Matrix(c) / 2)
for c in itertools.product([-1, 1], repeat=3)]
points += main_points
zero_indices = [i for i, v in enumerate(neighbor) if v == 0 and i < dim]
for i in zero_indices:
points += [point + sp.Matrix(unitvec(i)) for point in main_points]
points += [point - sp.Matrix(unitvec(i)) for point in main_points]
points_tuple = tuple([tuple(p) for p in points])
self._stencil = points_tuple
# determine the stencil weights
if len(derivative) == 0:
weights = None
else:
derivation = FiniteDifferenceStencilDerivation(derivative, points_tuple).get_stencil()
if not derivation.accuracy:
raise Exception('the requested derivative cannot be performed with the available neighbors')
weights = derivation.weights
# if the weights are underdefined, we can choose the free symbols to find the sparsest stencil
free_weights = set(itertools.chain(*[w.free_symbols for w in weights]))
if len(free_weights) > 0:
zero_counts = defaultdict(list)
for values in itertools.product([-1, -sp.Rational(1, 2), 0, 1, sp.Rational(1, 2)],
repeat=len(free_weights)):
subs = {free_weight: value for free_weight, value in zip(free_weights, values)}
weights = [w.subs(subs) for w in derivation.weights]
if not all(a == 0 for a in weights):
zero_count = sum([1 for w in weights if w == 0])
zero_counts[zero_count].append(weights)
best = zero_counts[max(zero_counts.keys())]
if len(best) > 1: # if there are multiple, pick the one that contains a nonzero center weight
center = [tuple(p + pos) for p in points].index((0, 0, 0))
best = [b for b in best if b[center] != 0]
if len(best) > 1:
raise NotImplementedError("more than one suitable set of weights found, don't know how to proceed")
weights = best[0]
assert weights
points_tuple = tuple([tuple(p + pos) for p in points])
self._points = points_tuple
self._weights = weights
@property
def points(self):
"""return the points of the stencil"""
return self._points
@property
def stencil(self):
"""return the points of the stencil relative to the staggered position specified by neighbor"""
return self._stencil
@property
def weights(self):
"""return the weights of the stencil"""
assert self._weights is not None
return self._weights
def visualize(self):
if self._weights is None:
ws = None
else:
ws = np.array([w for w in self.weights if w != 0], dtype=float)
pts = np.array([p for i, p in enumerate(self.points) if self.weights[i] != 0], dtype=int)
from pystencils.stencil import plot
plot(pts, data=ws)
def apply(self, field):
if field.index_dimensions == 0:
return sum([field.__getitem__(point) * weight for point, weight in zip(self.points, self.weights)])
else:
total = field.neighbor_vector(self.points[0]) * self.weights[0]
for point, weight in zip(self.points[1:], self.weights[1:]):
total += field.neighbor_vector(point) * weight
return total
......@@ -15,7 +15,7 @@ import pystencils
from pystencils.alignedarray import aligned_empty
from pystencils.data_types import StructType, TypedSymbol, create_type
from pystencils.kernelparameters import FieldShapeSymbol, FieldStrideSymbol
from pystencils.stencil import direction_string_to_offset, offset_to_direction_string
from pystencils.stencil import direction_string_to_offset, offset_to_direction_string, inverse_direction
from pystencils.sympyextensions import is_integer_sequence
__all__ = ['Field', 'fields', 'FieldType', 'AbstractField']
......@@ -34,6 +34,8 @@ class FieldType(Enum):
CUSTOM = 3
# staggered field
STAGGERED = 4
# staggered field that reverses sign when accessed via opposite direction
STAGGERED_FLUX = 5
@staticmethod
def is_generic(field):
......@@ -58,7 +60,12 @@ class FieldType(Enum):
@staticmethod
def is_staggered(field):
assert isinstance(field, Field)
return field.field_type == FieldType.STAGGERED
return field.field_type == FieldType.STAGGERED or field.field_type == FieldType.STAGGERED_FLUX
@staticmethod
def is_staggered_flux(field):
assert isinstance(field, Field)
return field.field_type == FieldType.STAGGERED_FLUX
def fields(description=None, index_dimensions=0, layout=None, field_type=FieldType.GENERIC, **kwargs):
......@@ -83,7 +90,7 @@ def fields(description=None, index_dimensions=0, layout=None, field_type=FieldTy
Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s)
[f1, f2]
[f1: double[20,20], f2: double[20,20]]
The keyword names ``index_dimension`` and ``layout`` have special meaning, don't use them for field names
>>> f = fields(f=arr_v, index_dimensions=1)
......@@ -392,7 +399,19 @@ class Field(AbstractField):
return self.dtype.numpy_dtype.itemsize
def __repr__(self):
return self._field_name
if any(isinstance(s, sp.Symbol) for s in self.spatial_shape):
spatial_shape_str = f'{self.spatial_dimensions}d'
else:
spatial_shape_str = ','.join(str(i) for i in self.spatial_shape)
index_shape_str = ','.join(str(i) for i in self.index_shape)
if self.index_shape:
return f'{self._field_name}({index_shape_str}): {self.dtype}[{spatial_shape_str}]'
else:
return f'{self._field_name}: {self.dtype}[{spatial_shape_str}]'
def __str__(self):
return self.name
def neighbor(self, coord_id, offset):
offset_list = [0] * self.spatial_dimensions
......@@ -407,19 +426,37 @@ class Field(AbstractField):
index_shape = self.index_shape
if len(index_shape) == 0:
return sp.Matrix([self.center])
if len(index_shape) == 1:
elif len(index_shape) == 1:
return sp.Matrix([self(i) for i in range(index_shape[0])])
elif len(index_shape) == 2:
def cb(*args):
r = self.__call__(*args)
return r
return sp.Matrix(*index_shape, cb)
return sp.Matrix([[self(i, j) for j in range(index_shape[1])] for i in range(index_shape[0])])
elif len(index_shape) == 3:
return sp.Matrix([[[self(i, j, k) for k in range(index_shape[2])]
for j in range(index_shape[1])] for i in range(index_shape[0])])
else:
raise NotImplementedError("center_vector is not implemented for more than 3 index dimensions")
@property
def center(self):
center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center)
def neighbor_vector(self, offset):
"""Like neighbor, but returns the entire vector/tensor stored at offset."""
if self.spatial_dimensions == 2 and len(offset) == 3:
assert offset[2] == 0
offset = offset[:2]
if self.index_dimensions == 0:
return sp.Matrix([self.__getitem__(offset)])
elif self.index_dimensions == 1:
return sp.Matrix([self.__getitem__(offset)(i) for i in range(self.index_shape[0])])
elif self.index_dimensions == 2:
return sp.Matrix([[self.__getitem__(offset)(i, k) for k in range(self.index_shape[1])]
for i in range(self.index_shape[0])])
else:
raise NotImplementedError("neighbor_vector is not implemented for more than 2 index dimensions")
def __getitem__(self, offset):
if type(offset) is np.ndarray:
offset = tuple(offset)
......@@ -466,6 +503,7 @@ class Field(AbstractField):
"""
assert FieldType.is_staggered(self)
offset_orig = offset
if type(offset) is np.ndarray:
offset = tuple(offset)
if type(offset) is str:
......@@ -477,20 +515,29 @@ class Field(AbstractField):
raise ValueError("Wrong number of spatial indices: "
"Got %d, expected %d" % (len(offset), self.spatial_dimensions))
offset = list(offset)
neighbor = [0] * len(offset)
for i, o in enumerate(offset):
if (o + sp.Rational(1, 2)).is_Integer:
offset[i] += sp.Rational(1, 2)
neighbor[i] = 1
neighbor = offset_to_direction_string(neighbor)
prefactor = 1
neighbor_vec = [0] * len(offset)
for i in range(self.spatial_dimensions):
if (offset[i] + sp.Rational(1, 2)).is_Integer:
neighbor_vec[i] = sp.sign(offset[i])
neighbor = offset_to_direction_string(neighbor_vec)
if neighbor not in self.staggered_stencil:
neighbor_vec = inverse_direction(neighbor_vec)
neighbor = offset_to_direction_string(neighbor_vec)
if FieldType.is_staggered_flux(self):
prefactor = -1
if neighbor not in self.staggered_stencil:
raise ValueError("{} is not a valid neighbor for the {} stencil".format(offset_orig,
self.staggered_stencil_name))
offset = tuple(sp.Matrix(offset) - sp.Rational(1, 2) * sp.Matrix(neighbor_vec))
idx = self.staggered_stencil.index(neighbor)
offset = tuple(offset)
if self.index_dimensions == 1: # this field stores a scalar value at each staggered position
if index is not None:
raise ValueError("Cannot specify an index for a scalar staggered field")
return Field.Access(self, offset, (idx,))
return prefactor * Field.Access(self, offset, (idx,))
else: # this field stores a vector or tensor at each staggered position
if index is None:
raise ValueError("Wrong number of indices: "
......@@ -503,34 +550,58 @@ class Field(AbstractField):
raise ValueError("Wrong number of indices: "
"Got %d, expected %d" % (len(index), self.index_dimensions - 1))
return Field.Access(self, offset, (idx, *index))
return prefactor * Field.Access(self, offset, (idx, *index))
def staggered_vector_access(self, offset):
"""Like staggered_access, but returns the entire vector/tensor stored at offset."""
assert FieldType.is_staggered(self)
if self.index_dimensions == 1:
return sp.Matrix([self.staggered_access(offset)])
elif self.index_dimensions == 2:
return sp.Matrix([self.staggered_access(offset, i) for i in range(self.index_shape[1])])
elif self.index_dimensions == 3:
return sp.Matrix([[self.staggered_access(offset, (i, k)) for k in range(self.index_shape[2])]
for i in range(self.index_shape[1])])
else:
raise NotImplementedError("staggered_vector_access is not implemented for more than 3 index dimensions")
@property
def staggered_stencil(self):
assert FieldType.is_staggered(self)
stencils = {
2: {
2: ["E", "N"], # D2Q5
4: ["E", "N", "NE", "SE"] # D2Q9
2: ["W", "S"], # D2Q5
4: ["W", "S", "SW", "NW"] # D2Q9
},
3: {
3: ["E", "N", "T"], # D3Q7
7: ["E", "N", "T", "TNE", "BNE", "TSE", "BSE "], # D3Q15
9: ["E", "N", "T", "NE", "SE", "TE", "BE", "TN", "BN"], # D3Q19
13: ["E", "N", "T", "NE", "SE", "TE", "BE", "TN", "BN", "TNE", "BNE", "TSE", "BSE"] # D3Q27
3: ["W", "S", "B"], # D3Q7
7: ["W", "S", "B", "BSW", "TSW", "BNW", "TNW"], # D3Q15
9: ["W", "S", "B", "SW", "NW", "BW", "TW", "BS", "TS"], # D3Q19
13: ["W", "S", "B", "SW", "NW", "BW", "TW", "BS", "TS", "BSW", "TSW", "BNW", "TNW"] # D3Q27
}
}
if not self.index_shape[0] in stencils[self.spatial_dimensions]:
raise ValueError("No known stencil has {} staggered points".format(self.index_shape[0]))
return stencils[self.spatial_dimensions][self.index_shape[0]]
@property
def staggered_stencil_name(self):
assert FieldType.is_staggered(self)
return "D%dQ%d" % (self.spatial_dimensions, self.index_shape[0] * 2 + 1)
def __call__(self, *args, **kwargs):
center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center)(*args, **kwargs)
def hashable_contents(self):
dth = hash(self._dtype)
return self._layout, self.shape, self.strides, dth, self.field_type, self._field_name, self.latex_name
return (self._layout,
self.shape,
self.strides,
self.field_type,
self._field_name,
self.latex_name,
self._dtype)
def __hash__(self):
return hash(self.hashable_contents())
......@@ -774,7 +845,7 @@ class Field(AbstractField):
assert FieldType.is_staggered(self._field)
neighbor = self._field.staggered_stencil[index]
neighbor = direction_string_to_offset(neighbor, self._field.spatial_dimensions)
return [(o - sp.Rational(neighbor[i], 2)) for i, o in enumerate(offsets)]
return [(o - sp.Rational(int(neighbor[i]), 2)) for i, o in enumerate(offsets)]
def _latex(self, _):
n = self._field.latex_name if self._field.latex_name else self._field.name
......
import itertools
from types import MappingProxyType
from itertools import combinations
import sympy as sp
from pystencils.assignment import Assignment
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
from pystencils.cpu.vectorization import vectorize
from pystencils.field import Field, FieldType
from pystencils.gpucuda.indexing import indexing_creator_from_params
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
from pystencils.transformations import (
loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
......@@ -23,7 +25,9 @@ def create_kernel(assignments,
cpu_blocking=None,
gpu_indexing='block',
gpu_indexing_params=MappingProxyType({}),
use_textures_for_interpolation=True):
use_textures_for_interpolation=True,
cpu_prepend_optimizations=[],
use_auto_for_assignments=False):
"""
Creates abstract syntax tree (AST) of kernel, using a list of update equations.
......@@ -34,9 +38,9 @@ def create_kernel(assignments,
to type
iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
part of the field is iterated over
ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
a single integer specifies the ghost layer count at all borders, can also be a sequence of
pairs ``[(x_lower_gl, x_upper_gl), .... ]``
ghost_layers: a single integer specifies the ghost layer count at all borders, can also be a sequence of
pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration.
If left to default, the number of ghost layers is determined automatically.
skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
periodicity kernel, that access the field outside the iteration bounds. Use with care!
cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
......@@ -47,6 +51,7 @@ def create_kernel(assignments,
gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
......@@ -84,6 +89,8 @@ def create_kernel(assignments,
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check)
for optimization in cpu_prepend_optimizations:
optimization(ast)
omp_collapse = None
if cpu_blocking:
omp_collapse = loop_blocking(ast, cpu_blocking)
......@@ -96,12 +103,10 @@ def create_kernel(assignments,
vectorize(ast, **cpu_vectorize_info)
else:
raise ValueError("Invalid value for cpu_vectorize_info")
return ast
elif target == 'llvm':
from pystencils.llvm import create_kernel
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers)
return ast
elif target == 'gpu':
from pystencils.gpucuda import create_cuda_kernel
ast = create_cuda_kernel(assignments, type_info=data_type,
......@@ -109,10 +114,15 @@ def create_kernel(assignments,
iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check,
use_textures_for_interpolation=use_textures_for_interpolation)
return ast
else:
raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))
if use_auto_for_assignments:
for a in ast.atoms(SympyAssignment):
a.use_auto = True
return ast
def create_indexed_kernel(assignments,
index_fields,
......@@ -186,104 +196,132 @@ def create_indexed_kernel(assignments,
raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu',
gpu_exclusive_conditions=False, **kwargs):
def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=False, **kwargs):
"""Kernel that updates a staggered field.
.. image:: /img/staggered_grid.svg
For a staggered field, the first index coordinate defines the location of the staggered value.
Further index coordinates can be used to store vectors/tensors at each point.
Args:
staggered_field: field where the first index coordinate defines the location of the staggered value
can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location
a vector is stored, expressions parameter has to be a sequence of sequences then
where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell
boundary and ``f[0,0](1)`` the southern cell boundary etc.
expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary
should be updated.
subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
target: 'cpu' or 'gpu'
gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed
assignments: a sequence of assignments or an AssignmentCollection.
Assignments to staggered field are processed specially, while subexpressions and assignments to
regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
used, but they all need to use the same stencil (i.e. the same number of staggered points) and
shape.
target: 'cpu', 'llvm' or 'gpu'
gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
handled in an else branch.
kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed
Returns:
AST, see `create_kernel`
"""
assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
assert staggered_field.index_dimensions in (1, 2), 'Staggered field must have one or two index dimensions'
if isinstance(assignments, AssignmentCollection):
subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
if not hasattr(a, 'lhs')
or type(a.lhs) is not Field.Access
or not FieldType.is_staggered(a.lhs.field)]
assignments = [a for a in assignments.main_assignments if hasattr(a, 'lhs')
and type(a.lhs) is Field.Access
and FieldType.is_staggered(a.lhs.field)]
else:
subexpressions = [a for a in assignments if not hasattr(a, 'lhs')
or type(a.lhs) is not Field.Access
or not FieldType.is_staggered(a.lhs.field)]
assignments = [a for a in assignments if hasattr(a, 'lhs')
and type(a.lhs) is Field.Access
and FieldType.is_staggered(a.lhs.field)]
if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
raise ValueError("All assignments need to be made to staggered fields with the same stencil")
if len(set([a.lhs.field.shape for a in assignments])) != 1:
raise ValueError("All assignments need to be made to staggered fields with the same shape")
staggered_field = assignments[0].lhs.field
stencil = staggered_field.staggered_stencil
dim = staggered_field.spatial_dimensions
shape = staggered_field.shape
counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
conditions = [counters[i] < staggered_field.shape[i] - 1 for i in range(dim)]
assert len(expressions) == dim
if staggered_field.index_dimensions == 2:
assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \
"If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \
"same length."
final_assignments = []
last_conditional = None
def add(condition, dimensions, as_else_block=False):
nonlocal last_conditional
if staggered_field.index_dimensions == 1:
assignments = [Assignment(staggered_field(d), expressions[d]) for d in dimensions]
a_coll = AssignmentCollection(assignments, list(subexpressions))
a_coll = a_coll.new_filtered([staggered_field(d) for d in dimensions])
elif staggered_field.index_dimensions == 2:
assert staggered_field.has_fixed_index_shape
assignments = [Assignment(staggered_field(d, i), expr)
for d in dimensions
for i, expr in enumerate(expressions[d])]
a_coll = AssignmentCollection(assignments, list(subexpressions))
a_coll = a_coll.new_filtered([staggered_field(d, i) for i in range(staggered_field.index_shape[1])
for d in dimensions])
sp_assignments = [SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments]
if as_else_block and last_conditional:
new_cond = Conditional(condition, Block(sp_assignments))
last_conditional.false_block = Block([new_cond])
last_conditional = new_cond
else:
last_conditional = Conditional(condition, Block(sp_assignments))
final_assignments.append(last_conditional)
if target == 'cpu' or not gpu_exclusive_conditions:
for d in range(dim):
cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
add(cond, [d])
elif target == 'gpu':
full_conditions = [sp.And(*[conditions[i] for i in range(dim) if d != i]) for d in range(dim)]
for include in itertools.product(*[[1, 0]] * dim):
case_conditions = sp.And(*[c if value else sp.Not(c) for c, value in zip(full_conditions, include)])
dimensions_to_include = [i for i in range(dim) if include[i]]
if dimensions_to_include:
add(case_conditions, dimensions_to_include, True)
# find out whether any of the ghost layers is not needed
common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
for direction in stencil:
exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
for elementary_direction in direction:
exclusions.remove(inverse_direction_string(elementary_direction))
common_exclusions.intersection_update(exclusions)
ghost_layers = [[0, 0] for d in range(dim)]
for direction in common_exclusions:
direction = direction_string_to_offset(direction)
for d, s in enumerate(direction):
if s == 1:
ghost_layers[d][1] = 1
elif s == -1:
ghost_layers[d][0] = 1
ghost_layers = [(1, 0)] * dim
def condition(direction):
"""exclude those staggered points that correspond to fluxes between ghost cells"""
exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
blocking = kwargs.get('cpu_blocking', None)
if blocking:
del kwargs['cpu_blocking']
for elementary_direction in direction:
exclusions.remove(inverse_direction_string(elementary_direction))
conditions = []
for e in exclusions:
if e in common_exclusions:
continue
offset = direction_string_to_offset(e)
for i, o in enumerate(offset):
if o == 1:
conditions.append(counters[i] < shape[i] - 1)
elif o == -1:
conditions.append(counters[i] > 0)
return sp.And(*conditions)
cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None)
if cpu_vectorize_info:
del kwargs['cpu_vectorize_info']
openmp = kwargs.get('cpu_openmp', None)
if openmp:
del kwargs['cpu_openmp']
if gpu_exclusive_conditions:
outer_assignment = None
conditions = {direction: condition(direction) for direction in stencil}
for num_conditions in range(len(stencil)):
for combination in combinations(conditions.values(), num_conditions):
for assignment in assignments:
direction = stencil[assignment.lhs.index[0]]
if conditions[direction] in combination:
assignment = SympyAssignment(assignment.lhs, assignment.rhs)
outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment)
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
inner_assignment = []
for assignment in assignments:
direction = stencil[assignment.lhs.index[0]]
inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs))
last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]),
Block(inner_assignment), outer_assignment)
final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
[SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
[last_conditional]
if target == 'cpu':
remove_conditionals_in_staggered_kernel(ast)
move_constants_before_loop(ast)
omp_collapse = None
if blocking:
omp_collapse = loop_blocking(ast, blocking)
if openmp:
from pystencils.cpu import add_openmp
add_openmp(ast, num_threads=openmp, collapse=omp_collapse, assume_single_outer_loop=False)
if cpu_vectorize_info is True:
vectorize(ast)
elif isinstance(cpu_vectorize_info, dict):
vectorize(ast, **cpu_vectorize_info)
if target == 'cpu':
from pystencils.cpu import create_kernel as create_kernel_cpu
ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, **kwargs)
else:
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
return ast
for assignment in assignments:
direction = stencil[assignment.lhs.index[0]]
sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
[SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
[SympyAssignment(assignment.lhs, assignment.rhs)]
last_conditional = Conditional(condition(direction), Block(sp_assignments))
final_assignments.append(last_conditional)
remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
move_constants_before_loop]
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target,
cpu_prepend_optimizations=prepend_optimizations, **kwargs)
return ast
......@@ -16,6 +16,11 @@ def inverse_direction(direction):
return tuple([-i for i in direction])
def inverse_direction_string(direction):
"""Returns inverse of given direction string"""
return offset_to_direction_string(inverse_direction(direction_string_to_offset(direction)))
def is_valid(stencil, max_neighborhood=None):
"""
Tests if a nested sequence is a valid stencil i.e. all the inner sequences have the same length.
......
......@@ -8,14 +8,14 @@ from types import MappingProxyType
import numpy as np
import sympy as sp
from sympy.core.numbers import ImaginaryUnit
from sympy.logic.boolalg import Boolean
from sympy.logic.boolalg import Boolean, BooleanFunction
import pystencils.astnodes as ast
import pystencils.integer_functions
from pystencils.assignment import Assignment
from pystencils.data_types import (
PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type,
get_base_type, get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func)
PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, get_base_type,
get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func)
from pystencils.field import AbstractField, Field, FieldType
from pystencils.kernelparameters import FieldPointerSymbol
from pystencils.simp.assignment_collection import AssignmentCollection
......@@ -630,13 +630,18 @@ def move_constants_before_loop(ast_node):
else:
target.insert_before(child, child_to_insert_before)
elif exists_already and exists_already.rhs == child.rhs:
pass
if target.args.index(exists_already) > target.args.index(child_to_insert_before):
assert target.args.count(exists_already) == 1
assert target.args.count(child_to_insert_before) == 1
target.args.remove(exists_already)
target.insert_before(exists_already, child_to_insert_before)
else:
# this variable already exists in outer block, but with different rhs
# -> symbol has to be renamed
assert isinstance(child.lhs, TypedSymbol)
new_symbol = TypedSymbol(sp.Dummy().name, child.lhs.dtype)
target.insert_before(ast.SympyAssignment(new_symbol, child.rhs), child_to_insert_before)
target.insert_before(ast.SympyAssignment(new_symbol, child.rhs, is_const=child.is_const),
child_to_insert_before)
substitute_variables[child.lhs] = new_symbol
......@@ -846,7 +851,7 @@ class KernelConstraintsCheck:
return cast_func(
self.process_expression(rhs.args[0], type_constants=False),
rhs.dtype)
elif isinstance(rhs, sp.boolalg.BooleanFunction) or \
elif isinstance(rhs, BooleanFunction) or \
type(rhs) in pystencils.integer_functions.__dict__.values():
new_args = [self.process_expression(a, type_constants) for a in rhs.args]
types_of_expressions = [get_type_of_expression(a) for a in new_args]
......@@ -1025,7 +1030,7 @@ def insert_casts(node):
types = [get_type_of_expression(arg) for arg in args]
assert len(types) > 0
# Never ever, ever collate to float type for boolean functions!
target = collate_types(types, forbid_collation_to_float=isinstance(node.func, sp.boolalg.BooleanFunction))
target = collate_types(types, forbid_collation_to_float=isinstance(node.func, BooleanFunction))
zipped = list(zip(args, types))
if target.func is PointerType:
assert node.func is sp.Add
......@@ -1064,15 +1069,19 @@ def insert_casts(node):
return node.func(*args)
def remove_conditionals_in_staggered_kernel(function_node: ast.KernelFunction) -> None:
"""Removes conditionals of a kernel that iterates over staggered positions by splitting the loops at last element"""
def remove_conditionals_in_staggered_kernel(function_node: ast.KernelFunction, include_first=True) -> None:
"""Removes conditionals of a kernel that iterates over staggered positions by splitting the loops at last or
first and last element"""
all_inner_loops = [l for l in function_node.atoms(ast.LoopOverCoordinate) if l.is_innermost_loop]
assert len(all_inner_loops) == 1, "Transformation works only on kernels with exactly one inner loop"
inner_loop = all_inner_loops.pop()
for loop in parents_of_type(inner_loop, ast.LoopOverCoordinate, include_current=True):
cut_loop(loop, [loop.stop - 1])
if include_first:
cut_loop(loop, [loop.start + 1, loop.stop - 1])
else:
cut_loop(loop, [loop.stop - 1])
simplify_conditionals(function_node.body, loop_counter_simplification=True)
cleanup_blocks(function_node.body)
......
......@@ -11,8 +11,9 @@ def test_blocking_staggered():
f[0, 0, 0] - f[0, -1, 0],
f[0, 0, 0] - f[0, 0, -1],
]
kernel = ps.create_staggered_kernel(stag, terms, cpu_blocking=(3, 16, 8)).compile()
reference_kernel = ps.create_staggered_kernel(stag, terms).compile()
assignments = [ps.Assignment(stag.staggered_access(d), terms[i]) for i, d in enumerate(stag.staggered_stencil)]
kernel = ps.create_staggered_kernel(assignments, cpu_blocking=(3, 16, 8)).compile()
reference_kernel = ps.create_staggered_kernel(assignments).compile()
print(ps.show_code(kernel.ast))
f_arr = np.random.rand(80, 33, 19)
......
......@@ -32,8 +32,7 @@ def test_cuda_but_not_c():
})
ast = pystencils.create_kernel(assignments, 'cpu')
code = str(pystencils.show_code(ast))
assert "Not supported" in code
print(pystencils.show_code(ast))
def test_cuda_unknown():
......@@ -46,4 +45,3 @@ def test_cuda_unknown():
ast = pystencils.create_kernel(assignments, 'gpu')
code = str(pystencils.show_code(ast))
print(code)
assert "Not supported in CUDA" in code
......@@ -28,6 +28,9 @@ def test_field_basic():
assert neighbor.offsets == (-1, 1)
assert '_' in neighbor._latex('dummy')
f = Field.create_fixed_size('f', (8, 8, 2, 2, 2), index_dimensions=3)
assert f.center_vector == sp.Matrix([[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)])
f = Field.create_generic('f', spatial_dimensions=5, index_dimensions=2)
field_access = f[1, -1, 2, -3, 0](1, 0)
assert field_access.offsets == (1, -1, 2, -3, 0)
......@@ -135,17 +138,30 @@ def test_staggered():
j1, j2, j3 = ps.fields('j1(2), j2(2,2), j3(2,2,2) : double[2D]', field_type=FieldType.STAGGERED)
assert j1[0, 1](1) == j1.staggered_access((0, sp.Rational(1, 2)))
assert j1[1, 1](1) == j1.staggered_access((1, sp.Rational(1, 2)))
assert j1[0, 2](1) == j1.staggered_access((0, sp.Rational(3, 2)))
assert j1[0, 1](1) == j1.staggered_access("N")
assert j1[0, 0](1) == j1.staggered_access("S")
assert j1.staggered_vector_access("N") == sp.Matrix([j1.staggered_access("N")])
assert j2[0, 1](1, 1) == j2.staggered_access((0, sp.Rational(1, 2)), 1)
assert j2[0, 1](1, 1) == j2.staggered_access("N", 1)
assert j2.staggered_vector_access("N") == sp.Matrix([j2.staggered_access("N", 0), j2.staggered_access("N", 1)])
assert j3[0, 1](1, 1, 1) == j3.staggered_access((0, sp.Rational(1, 2)), (1, 1))
assert j3[0, 1](1, 1, 1) == j3.staggered_access("N", (1, 1))
assert j3.staggered_vector_access("N") == sp.Matrix([[j3.staggered_access("N", (i, j))
for j in range(2)] for i in range(2)])
# D2Q9
k = ps.fields('k(4) : double[2D]', field_type=FieldType.STAGGERED)
assert k[1, 1](2) == k.staggered_access("NE")
assert k[0, 0](2) == k.staggered_access("SW")
assert k[0, 0](3) == k.staggered_access("NW")
# sign reversed when using as flux field
r = ps.fields('r(2) : double[2D]', field_type=FieldType.STAGGERED_FLUX)
assert r[0, 0](0) == r.staggered_access("W")
assert -r[1, 0](0) == r.staggered_access("E")
......@@ -55,7 +55,8 @@ def test_staggered_iteration():
for d in range(dim):
expressions.append(sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
sum(f[o] for o in offsets_in_plane(d, -1, dim)))
func_optimized = create_staggered_kernel(s, expressions).compile()
assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
func_optimized = create_staggered_kernel(assignments).compile()
assert not func_optimized.ast.atoms(Conditional), "Loop cutting optimization did not work"
func(f=f_arr, s=s_arr_ref)
......@@ -111,8 +112,10 @@ def test_staggered_gpu():
s = ps.fields("s({dim}): double[{dim}D]".format(dim=dim), field_type=FieldType.STAGGERED)
expressions = [(f[0, 0] + f[-1, 0]) / 2,
(f[0, 0] + f[0, -1]) / 2]
kernel_ast = ps.create_staggered_kernel(s, expressions, target='gpu', gpu_exclusive_conditions=True)
assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
kernel_ast = ps.create_staggered_kernel(assignments, target='gpu', gpu_exclusive_conditions=True)
assert len(kernel_ast.atoms(Conditional)) == 4
kernel_ast = ps.create_staggered_kernel(s, expressions, target='gpu', gpu_exclusive_conditions=False)
assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
kernel_ast = ps.create_staggered_kernel(assignments, target='gpu', gpu_exclusive_conditions=False)
assert len(kernel_ast.atoms(Conditional)) == 3
import pystencils as ps
import numpy as np
import sympy as sp
class TestStaggeredDiffusion:
def _run(self, num_neighbors):
L = (40, 40)
D = 0.066
dt = 1
T = 100
dh = ps.create_data_handling(L, periodicity=True, default_target='cpu')
c = dh.add_array('c', values_per_cell=1)
j = dh.add_array('j', values_per_cell=num_neighbors, field_type=ps.FieldType.STAGGERED_FLUX)
x_staggered = - c[-1, 0] + c[0, 0]
y_staggered = - c[0, -1] + c[0, 0]
xy_staggered = - c[-1, -1] + c[0, 0]
xY_staggered = - c[-1, 1] + c[0, 0]
jj = j.staggered_access
divergence = -1 * D / (1 + sp.sqrt(2) if j.index_shape[0] == 4 else 1) * \
sum([jj(d) / sp.Matrix(ps.stencil.direction_string_to_offset(d)).norm() for d in j.staggered_stencil
+ [ps.stencil.inverse_direction_string(d) for d in j.staggered_stencil]])
update = [ps.Assignment(c.center, c.center + dt * divergence)]
flux = [ps.Assignment(j.staggered_access("W"), x_staggered),
ps.Assignment(j.staggered_access("S"), y_staggered)]
if j.index_shape[0] == 4:
flux += [ps.Assignment(j.staggered_access("SW"), xy_staggered),
ps.Assignment(j.staggered_access("NW"), xY_staggered)]
staggered_kernel = ps.create_staggered_kernel(flux, target=dh.default_target).compile()
div_kernel = ps.create_kernel(update, target=dh.default_target).compile()
def time_loop(steps):
sync = dh.synchronization_function([c.name])
dh.all_to_gpu()
for i in range(steps):
sync()
dh.run_kernel(staggered_kernel)
dh.run_kernel(div_kernel)
dh.all_to_cpu()
def init():
dh.fill(c.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
dh.fill(c.name, 0)
dh.fill(j.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
dh.cpu_arrays[c.name][L[0] // 2:L[0] // 2 + 2, L[1] // 2:L[1] // 2 + 2] = 1.0
init()
time_loop(T)
reference = np.empty(L)
for x in range(L[0]):
for y in range(L[1]):
r = np.array([x, y]) - L[0] / 2 + 0.5
reference[x, y] = (4 * np.pi * D * T)**(-dh.dim / 2) * np.exp(-np.dot(r, r) / (4 * D * T)) * (2**dh.dim)
assert np.abs(dh.gather_array(c.name) - reference).max() < 5e-4
def test_diffusion_2(self):
self._run(2)
def test_diffusion_4(self):
self._run(4)
def test_staggered_subexpressions():
dh = ps.create_data_handling((10, 10), periodicity=True, default_target='cpu')
j = dh.add_array('j', values_per_cell=2, field_type=ps.FieldType.STAGGERED)
c = sp.symbols("c")
assignments = [ps.Assignment(j.staggered_access("W"), c),
ps.Assignment(c, 1)]
ps.create_staggered_kernel(assignments, target=dh.default_target).compile()
def test_staggered_loop_cutting():
dh = ps.create_data_handling((4, 4), periodicity=True, default_target='cpu')
j = dh.add_array('j', values_per_cell=4, field_type=ps.FieldType.STAGGERED)
assignments = [ps.Assignment(j.staggered_access("SW"), 1)]
ast = ps.create_staggered_kernel(assignments, target=dh.default_target)
assert not ast.atoms(ps.astnodes.Conditional)
import sympy as sp
import numpy as np
import pystencils as ps
from pystencils import data_types
from pystencils.data_types import *
from pystencils.kernelparameters import FieldShapeSymbol
from pystencils.data_types import TypedSymbol, get_type_of_expression, VectorType, collate_types, create_type
def test_parsing():
......@@ -25,7 +25,6 @@ def test_collation():
def test_dtype_of_constants():
# Some come constants are neither of type Integer,Float,Rational and don't have args
# >>> isinstance(pi, Integer)
# False
......@@ -39,13 +38,25 @@ def test_dtype_of_constants():
def test_assumptions():
x = pystencils.fields('x: float32[3d]')
x = ps.fields('x: float32[3d]')
assert x.shape[0].is_nonnegative
assert (2 * x.shape[0]).is_nonnegative
assert (2 * x.shape[0]).is_integer
assert(TypedSymbol('a', create_type('uint64'))).is_nonnegative
assert (TypedSymbol('a', create_type('uint64'))).is_nonnegative
assert (TypedSymbol('a', create_type('uint64'))).is_positive is None
assert (TypedSymbol('a', create_type('uint64')) + 1).is_positive
assert (x.shape[0] + 1).is_real
def test_sqrt_of_integer():
"""Regression test for bug where sqrt(3) was classified as integer"""
f = ps.fields("f: [1D]")
tmp = sp.symbols("tmp")
assignments = [ps.Assignment(tmp, sp.sqrt(3)),
ps.Assignment(f[0], tmp)]
arr = np.array([1], dtype=np.float64)
kernel = ps.create_kernel(assignments).compile()
kernel(f=arr)
assert 1.7 < arr[0] < 1.8