Skip to content
Snippets Groups Projects

Reimplement create_staggered_kernel

Merged Michael Kuron requested to merge staggered_kernel into master
Compare and
3 files
+ 153
81
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 81
81
import itertools
from types import MappingProxyType
from types import MappingProxyType
import sympy as sp
import sympy as sp
@@ -6,8 +5,10 @@ import sympy as sp
@@ -6,8 +5,10 @@ import sympy as sp
from pystencils.assignment import Assignment
from pystencils.assignment import Assignment
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
from pystencils.cpu.vectorization import vectorize
from pystencils.cpu.vectorization import vectorize
 
from pystencils.field import Field, FieldType
from pystencils.gpucuda.indexing import indexing_creator_from_params
from pystencils.gpucuda.indexing import indexing_creator_from_params
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.simp.assignment_collection import AssignmentCollection
 
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
from pystencils.transformations import (
from pystencils.transformations import (
loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
@@ -23,7 +24,8 @@ def create_kernel(assignments,
@@ -23,7 +24,8 @@ def create_kernel(assignments,
cpu_blocking=None,
cpu_blocking=None,
gpu_indexing='block',
gpu_indexing='block',
gpu_indexing_params=MappingProxyType({}),
gpu_indexing_params=MappingProxyType({}),
use_textures_for_interpolation=True):
use_textures_for_interpolation=True,
 
cpu_prepend_optimizations=[]):
"""
"""
Creates abstract syntax tree (AST) of kernel, using a list of update equations.
Creates abstract syntax tree (AST) of kernel, using a list of update equations.
@@ -47,6 +49,7 @@ def create_kernel(assignments,
@@ -47,6 +49,7 @@ def create_kernel(assignments,
gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
 
cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
Returns:
Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
@@ -84,6 +87,8 @@ def create_kernel(assignments,
@@ -84,6 +87,8 @@ def create_kernel(assignments,
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers,
iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check)
skip_independence_check=skip_independence_check)
 
for optimization in cpu_prepend_optimizations:
 
optimization(ast)
omp_collapse = None
omp_collapse = None
if cpu_blocking:
if cpu_blocking:
omp_collapse = loop_blocking(ast, cpu_blocking)
omp_collapse = loop_blocking(ast, cpu_blocking)
@@ -186,104 +191,99 @@ def create_indexed_kernel(assignments,
@@ -186,104 +191,99 @@ def create_indexed_kernel(assignments,
raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu',
def create_staggered_kernel(assignments, gpu_exclusive_conditions=False, **kwargs):
gpu_exclusive_conditions=False, **kwargs):
"""Kernel that updates a staggered field.
"""Kernel that updates a staggered field.
.. image:: /img/staggered_grid.svg
.. image:: /img/staggered_grid.svg
 
For a staggered field, the first index coordinate defines the location of the staggered value.
 
Further index coordinates can be used to store vectors/tensors at each point.
 
Args:
Args:
staggered_field: field where the first index coordinate defines the location of the staggered value
assignments: a sequence of assignments or an AssignmentCollection.
can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location
Assignments to staggered field are processed specially, while subexpressions and assignments to
a vector is stored, expressions parameter has to be a sequence of sequences then
regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell
used, but they all need to use the same stencil (i.e. the same number of staggered points) and
boundary and ``f[0,0](1)`` the southern cell boundary etc.
shape.
expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary
gpu_exclusive_conditions: whether to use nested conditionals instead of multiple conditionals
should be updated.
kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed
subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
target: 'cpu' or 'gpu'
gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed
Returns:
Returns:
AST, see `create_kernel`
AST, see `create_kernel`
"""
"""
assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
assert staggered_field.index_dimensions in (1, 2), 'Staggered field must have one or two index dimensions'
 
subexpressions = ()
 
if isinstance(assignments, AssignmentCollection):
 
subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
 
if type(a.lhs) is not Field.Access
 
and not FieldType.is_staggered(a.lhs.field)]
 
assignments = [a for a in assignments.main_assignments if type(a.lhs) is Field.Access
 
and FieldType.is_staggered(a.lhs.field)]
 
else:
 
subexpressions = [a for a in assignments if type(a.lhs) is not Field.Access
 
and not FieldType.is_staggered(a.lhs.field)]
 
assignments = [a for a in assignments if type(a.lhs) is Field.Access
 
and FieldType.is_staggered(a.lhs.field)]
 
if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
 
raise ValueError("All assignments need to be made to staggered fields with the same stencil")
 
if len(set([a.lhs.field.shape for a in assignments])) != 1:
 
raise ValueError("All assignments need to be made to staggered fields with the same shape")
 
 
staggered_field = assignments[0].lhs.field
 
stencil = staggered_field.staggered_stencil
dim = staggered_field.spatial_dimensions
dim = staggered_field.spatial_dimensions
 
points = staggered_field.index_shape[0]
 
shape = staggered_field.shape
counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
conditions = [counters[i] < staggered_field.shape[i] - 1 for i in range(dim)]
assert len(expressions) == dim
if staggered_field.index_dimensions == 2:
assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \
"If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \
"same length."
final_assignments = []
final_assignments = []
last_conditional = None
def add(condition, dimensions, as_else_block=False):
# find out whether any of the ghost layers is not needed
nonlocal last_conditional
common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
if staggered_field.index_dimensions == 1:
for direction in stencil:
assignments = [Assignment(staggered_field(d), expressions[d]) for d in dimensions]
exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
a_coll = AssignmentCollection(assignments, list(subexpressions))
for elementary_direction in direction:
a_coll = a_coll.new_filtered([staggered_field(d) for d in dimensions])
exclusions.remove(inverse_direction_string(elementary_direction))
elif staggered_field.index_dimensions == 2:
common_exclusions.intersection_update(exclusions)
assert staggered_field.has_fixed_index_shape
ghost_layers = [[1, 1] for d in range(dim)]
assignments = [Assignment(staggered_field(d, i), expr)
for direction in common_exclusions:
for d in dimensions
direction = direction_string_to_offset(direction)
for i, expr in enumerate(expressions[d])]
for d, s in enumerate(direction):
a_coll = AssignmentCollection(assignments, list(subexpressions))
if s == 1:
a_coll = a_coll.new_filtered([staggered_field(d, i) for i in range(staggered_field.index_shape[1])
ghost_layers[d][0] = 0
for d in dimensions])
elif s == -1:
sp_assignments = [SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments]
ghost_layers[d][1] = 0
if as_else_block and last_conditional:
new_cond = Conditional(condition, Block(sp_assignments))
last_conditional.false_block = Block([new_cond])
last_conditional = new_cond
else:
last_conditional = Conditional(condition, Block(sp_assignments))
final_assignments.append(last_conditional)
if target == 'cpu' or not gpu_exclusive_conditions:
def condition(direction):
for d in range(dim):
"""exclude those staggered points that correspond to fluxes between ghost cells"""
cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
add(cond, [d])
elif target == 'gpu':
full_conditions = [sp.And(*[conditions[i] for i in range(dim) if d != i]) for d in range(dim)]
for include in itertools.product(*[[1, 0]] * dim):
case_conditions = sp.And(*[c if value else sp.Not(c) for c, value in zip(full_conditions, include)])
dimensions_to_include = [i for i in range(dim) if include[i]]
if dimensions_to_include:
add(case_conditions, dimensions_to_include, True)
ghost_layers = [(1, 0)] * dim
blocking = kwargs.get('cpu_blocking', None)
for elementary_direction in direction:
if blocking:
exclusions.remove(inverse_direction_string(elementary_direction))
del kwargs['cpu_blocking']
conditions = []
 
for e in exclusions:
 
if e in common_exclusions:
 
continue
 
offset = direction_string_to_offset(e)
 
for i, o in enumerate(offset):
 
if o == 1:
 
conditions.append(counters[i] < shape[i] - 1)
 
elif o == -1:
 
conditions.append(counters[i] > 0)
 
return sp.And(*conditions)
cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None)
if gpu_exclusive_conditions:
if cpu_vectorize_info:
raise NotImplementedError('gpu_exclusive_conditions is not implemented yet')
del kwargs['cpu_vectorize_info']
openmp = kwargs.get('cpu_openmp', None)
if openmp:
del kwargs['cpu_openmp']
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
for d, direction in zip(range(points), stencil):
 
sp_assignments = [SympyAssignment(assignments[d].lhs, assignments[d].rhs)] + \
 
[SympyAssignment(s.lhs, s.rhs) for s in subexpressions]
 
last_conditional = Conditional(condition(direction), Block(sp_assignments))
 
final_assignments.append(last_conditional)
if target == 'cpu':
prepend_optimizations = [remove_conditionals_in_staggered_kernel, move_constants_before_loop]
remove_conditionals_in_staggered_kernel(ast)
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, cpu_prepend_optimizations=prepend_optimizations,
move_constants_before_loop(ast)
**kwargs)
omp_collapse = None
if blocking:
omp_collapse = loop_blocking(ast, blocking)
if openmp:
from pystencils.cpu import add_openmp
add_openmp(ast, num_threads=openmp, collapse=omp_collapse, assume_single_outer_loop=False)
if cpu_vectorize_info is True:
vectorize(ast)
elif isinstance(cpu_vectorize_info, dict):
vectorize(ast, **cpu_vectorize_info)
return ast
return ast
Loading