Michael Kuron · 23a8f33d · 68da7457 · b3f9ef62 · b3deafd5 · df3177af
--- a/pystencils/kernelcreation.py

+ 104

− 3
+++ b/pystencils/kernelcreation.py

+ 104

− 3
 @@ -6,8 +6,10 @@ import sympy as sp
 from pystencils.assignment import Assignment
 from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
 from pystencils.cpu.vectorization import vectorize
+from pystencils.field import Field
 from pystencils.gpucuda.indexing import indexing_creator_from_params
 from pystencils.simp.assignment_collection import AssignmentCollection
+from pystencils.stencil import direction_string_to_offset, inverse_direction_string
 from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)

 @@ -23,7 +25,8 @@ def create_kernel(assignments,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({}),
-                  use_textures_for_interpolation=True):
+                  use_textures_for_interpolation=True,
+                  cpu_prepend_optimizations=[]):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

 @@ -47,6 +50,7 @@ def create_kernel(assignments,
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
+        cpu_prepend_optimizations: list of extra optimizations to perform first on the AST

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
 @@ -84,6 +88,8 @@ def create_kernel(assignments,
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
+        for optimization in cpu_prepend_optimizations:
+            optimization(ast)
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
 @@ -186,8 +192,18 @@ def create_indexed_kernel(assignments,
        raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))


-def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu',
-                            gpu_exclusive_conditions=False, **kwargs):
+def create_staggered_kernel(*args, **kwargs):
+    """Kernel that updates a staggered field. Dispatches to either create_staggered_kernel_1 or
+       create_staggered_kernel_2 depending on the argument types.
+    """
+    if 'staggered_field' in kwargs or type(args[0]) is Field:
+        return create_staggered_kernel_1(*args, **kwargs)
+    else:
+        return create_staggered_kernel_2(*args, **kwargs)
+
+
+def create_staggered_kernel_1(staggered_field, expressions, subexpressions=(), target='cpu',
+                              gpu_exclusive_conditions=False, **kwargs):
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg
 @@ -287,3 +303,88 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar
        elif isinstance(cpu_vectorize_info, dict):
            vectorize(ast, **cpu_vectorize_info)
    return ast
+
+
+def create_staggered_kernel_2(assignments, gpu_exclusive_conditions=False, **kwargs):
+    """Kernel that updates a staggered field.
+
+    .. image:: /img/staggered_grid.svg
+
+    For a staggered field, the first index coordinate defines the location of the staggered value.
+    Further index coordinates can be used to store vectors/tensors at each point.
+
+    Args:
+        assignments: a sequence of assignments or an AssignmentCollection with one item for each staggered grid point.
+                     When storing vectors/tensors, the number of items expected is multiplied with the number of
+                     components.
+        gpu_exclusive_conditions: whether to use nested conditionals instead of multiple conditionals
+        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed
+
+    Returns:
+        AST, see `create_kernel`
+    """
+    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
+
+    subexpressions = ()
+    if isinstance(assignments, AssignmentCollection):
+        assignments = assignments.main_assignments
+        subexpressions = assignments.subexpressions
+    if len(set([a.lhs.field for a in assignments])) != 1:
+        raise ValueError("All assignments need to be made to the same staggered field")
+    staggered_field = assignments[0].lhs.field
+    dim = staggered_field.spatial_dimensions
+    points = staggered_field.index_shape[0]
+    values_per_point = sp.Mul(*staggered_field.index_shape[1:])
+    assert len(assignments) == points * values_per_point
+
+    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
+
+    final_assignments = []
+
+    # find out whether any of the ghost layers is not needed
+    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
+    for direction in staggered_field.staggered_stencil:
+        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
+        for elementary_direction in direction:
+            exclusions.remove(inverse_direction_string(elementary_direction))
+        common_exclusions.intersection_update(exclusions)
+    ghost_layers = [[1, 1] for d in range(dim)]
+    for direction in common_exclusions:
+        direction = direction_string_to_offset(direction)
+        for d, s in enumerate(direction):
+            if s == 1:
+                ghost_layers[d][0] = 0
+            elif s == -1:
+                ghost_layers[d][1] = 0
+
+    def condition(direction):
+        """exclude those staggered points that correspond to fluxes between ghost cells"""
+        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
+
+        for elementary_direction in direction:
+            exclusions.remove(inverse_direction_string(elementary_direction))
+        conditions = []
+        for e in exclusions:
+            if e in common_exclusions:
+                continue
+            offset = direction_string_to_offset(e)
+            for i, o in enumerate(offset):
+                if o == 1:
+                    conditions.append(counters[i] < staggered_field.shape[i] - 1)
+                elif o == -1:
+                    conditions.append(counters[i] > 0)
+        return sp.And(*conditions)
+
+    if gpu_exclusive_conditions:
+        raise NotImplementedError('gpu_exclusive_conditions is not implemented yet')
+
+    for d, direction in zip(range(points), staggered_field.staggered_stencil):
+        sp_assignments = [SympyAssignment(assignments[d].lhs, assignments[d].rhs)] + \
+                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions]
+        last_conditional = Conditional(condition(direction), Block(sp_assignments))
+        final_assignments.append(last_conditional)
+
+    prepend_optimizations = [remove_conditionals_in_staggered_kernel, move_constants_before_loop]
+    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, cpu_prepend_optimizations=prepend_optimizations,
+                        **kwargs)
+    return ast