Skip to content
Snippets Groups Projects
Commit d7332d59 authored by Martin Bauer's avatar Martin Bauer
Browse files

OpenMP support for staggered kernels

parent 5c9ab9a8
Branches
Tags
No related merge requests found
......@@ -3,7 +3,7 @@ from functools import partial
from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction
from pystencils.transformations import resolve_buffer_accesses, resolve_field_accesses, make_loop_over_domain, \
add_types, get_optimal_loop_ordering, parse_base_pointer_info, move_constants_before_loop, \
split_inner_loop, get_base_buffer_index
split_inner_loop, get_base_buffer_index, filtered_tree_iteration
from pystencils.data_types import TypedSymbol, BasicType, StructType, create_type
from pystencils.field import Field, FieldType
import pystencils.astnodes as ast
......@@ -152,7 +152,7 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
return ast_node
def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None):
def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None, assume_single_outer_loop=True):
"""Parallelize the outer loop with OpenMP.
Args:
......@@ -160,6 +160,8 @@ def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None):
schedule: OpenMP scheduling policy e.g. 'static' or 'dynamic'
num_threads: explicitly specify number of threads
collapse: number of nested loops to include in parallel region (see OpenMP collapse)
assume_single_outer_loop: if True an exception is raised if multiple outer loops are detected for all but
optimized staggered kernels the single-outer-loop assumption should be true
"""
if not num_threads:
return
......@@ -170,31 +172,34 @@ def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None):
wrapper_block = ast.PragmaBlock('#pragma omp parallel' + threads_clause, body.take_child_nodes())
body.append(wrapper_block)
outer_loops = [l for l in body.atoms(ast.LoopOverCoordinate) if l.is_outermost_loop]
outer_loops = [l for l in filtered_tree_iteration(body, LoopOverCoordinate, stop_type=SympyAssignment)
if l.is_outermost_loop]
assert outer_loops, "No outer loop found"
assert len(outer_loops) <= 1, "More than one outer loop found. Not clear where to put OpenMP pragma."
loop_to_parallelize = outer_loops[0]
try:
loop_range = int(loop_to_parallelize.stop - loop_to_parallelize.start)
except TypeError:
loop_range = None
if num_threads is None:
import multiprocessing
num_threads = multiprocessing.cpu_count()
if loop_range is not None and loop_range < num_threads and not collapse:
contained_loops = [l for l in loop_to_parallelize.body.args if isinstance(l, LoopOverCoordinate)]
if len(contained_loops) == 1:
contained_loop = contained_loops[0]
try:
contained_loop_range = int(contained_loop.stop - contained_loop.start)
if contained_loop_range > loop_range:
loop_to_parallelize = contained_loop
except TypeError:
pass
prefix = "#pragma omp for schedule(%s)" % (schedule,)
if collapse:
prefix += " collapse(%d)" % (collapse, )
loop_to_parallelize.prefix_lines.append(prefix)
if assume_single_outer_loop and len(outer_loops) > 1:
raise ValueError("More than one outer loop found, only one outer loop expected")
for loop_to_parallelize in outer_loops:
try:
loop_range = int(loop_to_parallelize.stop - loop_to_parallelize.start)
except TypeError:
loop_range = None
if num_threads is None:
import multiprocessing
num_threads = multiprocessing.cpu_count()
if loop_range is not None and loop_range < num_threads and not collapse:
contained_loops = [l for l in loop_to_parallelize.body.args if isinstance(l, LoopOverCoordinate)]
if len(contained_loops) == 1:
contained_loop = contained_loops[0]
try:
contained_loop_range = int(contained_loop.stop - contained_loop.start)
if contained_loop_range > loop_range:
loop_to_parallelize = contained_loop
except TypeError:
pass
prefix = "#pragma omp for schedule(%s)" % (schedule,)
if collapse:
prefix += " collapse(%d)" % (collapse, )
loop_to_parallelize.prefix_lines.append(prefix)
......@@ -245,13 +245,21 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar
cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None)
if cpu_vectorize_info:
del kwargs['cpu_vectorize_info']
openmp = kwargs.get('cpu_openmp', None)
if openmp:
del kwargs['cpu_openmp']
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
if target == 'cpu':
remove_conditionals_in_staggered_kernel(ast)
move_constants_before_loop(ast)
omp_collapse = None
if blocking:
loop_blocking(ast, blocking)
omp_collapse = loop_blocking(ast, blocking)
if openmp:
from pystencils.cpu import add_openmp
add_openmp(ast, num_threads=openmp, collapse=omp_collapse, assume_single_outer_loop=False)
if cpu_vectorize_info is True:
vectorize(ast)
elif isinstance(cpu_vectorize_info, dict):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment