Skip to content
Snippets Groups Projects
Commit 4954fb73 authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Apply default optimizations in `create_kernel` routines

parent 66d064ae
No related branches found
No related tags found
No related merge requests found
...@@ -8,6 +8,7 @@ from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, Sympy ...@@ -8,6 +8,7 @@ from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, Sympy
from pystencils.cpu.cpujit import make_python_function from pystencils.cpu.cpujit import make_python_function
from pystencils.data_types import BasicType, StructType, TypedSymbol, create_type from pystencils.data_types import BasicType, StructType, TypedSymbol, create_type
from pystencils.field import Field, FieldType from pystencils.field import Field, FieldType
from pystencils.optimizations import optims_pystencils_cpu
from pystencils.transformations import ( from pystencils.transformations import (
add_types, filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering, add_types, filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering,
make_loop_over_domain, move_constants_before_loop, parse_base_pointer_info, make_loop_over_domain, move_constants_before_loop, parse_base_pointer_info,
...@@ -18,7 +19,7 @@ AssignmentOrAstNodeList = List[Union[Assignment, ast.Node]] ...@@ -18,7 +19,7 @@ AssignmentOrAstNodeList = List[Union[Assignment, ast.Node]]
def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "kernel", type_info='double', def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "kernel", type_info='double',
split_groups=(), iteration_slice=None, ghost_layers=None, split_groups=(), iteration_slice=None, ghost_layers=None,
skip_independence_check=False) -> KernelFunction: skip_independence_check=False, optimizations=optims_pystencils_cpu) -> KernelFunction:
"""Creates an abstract syntax tree for a kernel function, by taking a list of update rules. """Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
Loops are created according to the field accesses in the equations. Loops are created according to the field accesses in the equations.
...@@ -54,6 +55,10 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke ...@@ -54,6 +55,10 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
else: else:
raise ValueError("Term has to be field access or symbol") raise ValueError("Term has to be field access or symbol")
if optimizations is None:
optimizations = optims_pystencils_cpu
assignments = [Assignment(a.lhs, sp.codegen.rewriting.optimize(a.rhs, optimizations)) for a in assignments]
fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check) fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check)
all_fields = fields_read.union(fields_written) all_fields = fields_read.union(fields_written)
read_only_fields = set([f.name for f in fields_read - fields_written]) read_only_fields = set([f.name for f in fields_read - fields_written])
...@@ -89,8 +94,12 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke ...@@ -89,8 +94,12 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
return ast_node return ast_node
def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, function_name="kernel", def create_indexed_kernel(assignments: AssignmentOrAstNodeList,
type_info=None, coordinate_names=('x', 'y', 'z')) -> KernelFunction: index_fields,
function_name="kernel",
type_info=None,
coordinate_names=('x', 'y', 'z'),
optimizations=optims_pystencils_cpu) -> KernelFunction:
""" """
Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
...@@ -107,6 +116,11 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu ...@@ -107,6 +116,11 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
function_name: see documentation of :func:`create_kernel` function_name: see documentation of :func:`create_kernel`
coordinate_names: name of the coordinate fields in the struct data type coordinate_names: name of the coordinate fields in the struct data type
""" """
if optimizations is None:
optimizations = optims_pystencils_cpu
assignments = [Assignment(a.lhs, sp.codegen.rewriting.optimize(a.rhs, optimizations)) for a in assignments]
fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False) fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False)
all_fields = fields_read.union(fields_written) all_fields = fields_read.union(fields_written)
......
import sympy as sp
from pystencils import Assignment
from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
from pystencils.data_types import BasicType, StructType, TypedSymbol from pystencils.data_types import BasicType, StructType, TypedSymbol
from pystencils.field import Field, FieldType from pystencils.field import Field, FieldType
from pystencils.gpucuda.cudajit import make_python_function from pystencils.gpucuda.cudajit import make_python_function
from pystencils.gpucuda.indexing import BlockIndexing from pystencils.gpucuda.indexing import BlockIndexing
from pystencils.optimizations import optims_pystencils_gpu
from pystencils.transformations import ( from pystencils.transformations import (
add_types, get_base_buffer_index, get_common_shape, parse_base_pointer_info, add_types, get_base_buffer_index, get_common_shape, parse_base_pointer_info,
resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols) resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols)
def create_cuda_kernel(assignments, function_name="kernel", type_info=None, indexing_creator=BlockIndexing, def create_cuda_kernel(assignments, function_name="kernel", type_info=None, indexing_creator=BlockIndexing,
iteration_slice=None, ghost_layers=None, skip_independence_check=False): iteration_slice=None, ghost_layers=None, skip_independence_check=False, optimizations=None):
if optimizations is None:
optimizations = optims_pystencils_gpu
assignments = [Assignment(a.lhs, sp.codegen.rewriting.optimize(a.rhs, optimizations)) for a in assignments]
fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check) fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check)
all_fields = fields_read.union(fields_written) all_fields = fields_read.union(fields_written)
read_only_fields = set([f.name for f in fields_read - fields_written]) read_only_fields = set([f.name for f in fields_read - fields_written])
...@@ -87,7 +96,11 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde ...@@ -87,7 +96,11 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde
def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel", type_info=None, def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel", type_info=None,
coordinate_names=('x', 'y', 'z'), indexing_creator=BlockIndexing): coordinate_names=('x', 'y', 'z'), indexing_creator=BlockIndexing, optimizations=None):
if optimizations is None:
optimizations = optims_pystencils_gpu
assignments = [Assignment(a.lhs, sp.codegen.rewriting.optimize(a.rhs, optimizations)) for a in assignments]
fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False) fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False)
all_fields = fields_read.union(fields_written) all_fields = fields_read.union(fields_written)
read_only_fields = set([f.name for f in fields_read - fields_written]) read_only_fields = set([f.name for f in fields_read - fields_written])
......
...@@ -15,7 +15,7 @@ from pystencils.transformations import ( ...@@ -15,7 +15,7 @@ from pystencils.transformations import (
def create_kernel(assignments, target='cpu', data_type="double", iteration_slice=None, ghost_layers=None, def create_kernel(assignments, target='cpu', data_type="double", iteration_slice=None, ghost_layers=None,
skip_independence_check=False, skip_independence_check=False,
cpu_openmp=False, cpu_vectorize_info=None, cpu_blocking=None, cpu_openmp=False, cpu_vectorize_info=None, cpu_blocking=None,
gpu_indexing='block', gpu_indexing_params=MappingProxyType({})): gpu_indexing='block', gpu_indexing_params=MappingProxyType({}), optimizations=None):
""" """
Creates abstract syntax tree (AST) of kernel, using a list of update equations. Creates abstract syntax tree (AST) of kernel, using a list of update equations.
...@@ -75,7 +75,7 @@ def create_kernel(assignments, target='cpu', data_type="double", iteration_slice ...@@ -75,7 +75,7 @@ def create_kernel(assignments, target='cpu', data_type="double", iteration_slice
from pystencils.cpu import add_openmp from pystencils.cpu import add_openmp
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers, iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check) skip_independence_check=skip_independence_check, optimizations=optimizations)
omp_collapse = None omp_collapse = None
if cpu_blocking: if cpu_blocking:
omp_collapse = loop_blocking(ast, cpu_blocking) omp_collapse = loop_blocking(ast, cpu_blocking)
...@@ -92,21 +92,22 @@ def create_kernel(assignments, target='cpu', data_type="double", iteration_slice ...@@ -92,21 +92,22 @@ def create_kernel(assignments, target='cpu', data_type="double", iteration_slice
elif target == 'llvm': elif target == 'llvm':
from pystencils.llvm import create_kernel from pystencils.llvm import create_kernel
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers) iteration_slice=iteration_slice, ghost_layers=ghost_layers, optimizations=optimizations)
return ast return ast
elif target == 'gpu': elif target == 'gpu':
from pystencils.gpucuda import create_cuda_kernel from pystencils.gpucuda import create_cuda_kernel
ast = create_cuda_kernel(assignments, type_info=data_type, ast = create_cuda_kernel(assignments, type_info=data_type,
indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params), indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
iteration_slice=iteration_slice, ghost_layers=ghost_layers, iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check) skip_independence_check=skip_independence_check, optimizations=optimizations)
return ast return ast
else: else:
raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,)) raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))
def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="double", coordinate_names=('x', 'y', 'z'), def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="double", coordinate_names=('x', 'y', 'z'),
cpu_openmp=True, gpu_indexing='block', gpu_indexing_params=MappingProxyType({})): cpu_openmp=True, gpu_indexing='block', gpu_indexing_params=MappingProxyType({}),
optimizations=None):
""" """
Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
...@@ -150,7 +151,7 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do ...@@ -150,7 +151,7 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do
from pystencils.cpu import create_indexed_kernel from pystencils.cpu import create_indexed_kernel
from pystencils.cpu import add_openmp from pystencils.cpu import add_openmp
ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type, ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
coordinate_names=coordinate_names) coordinate_names=coordinate_names, optimizations=optimizations)
if cpu_openmp: if cpu_openmp:
add_openmp(ast, num_threads=cpu_openmp) add_openmp(ast, num_threads=cpu_openmp)
return ast return ast
...@@ -160,14 +161,15 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do ...@@ -160,14 +161,15 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do
from pystencils.gpucuda import created_indexed_cuda_kernel from pystencils.gpucuda import created_indexed_cuda_kernel
idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params) idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type, ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type,
coordinate_names=coordinate_names, indexing_creator=idx_creator) coordinate_names=coordinate_names, indexing_creator=idx_creator,
optimizations=optimizations)
return ast return ast
else: else:
raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,)) raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu', def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu',
gpu_exclusive_conditions=False, **kwargs): gpu_exclusive_conditions=False, optimizations=None, **kwargs):
"""Kernel that updates a staggered field. """Kernel that updates a staggered field.
.. image:: /img/staggered_grid.svg .. image:: /img/staggered_grid.svg
...@@ -251,7 +253,12 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar ...@@ -251,7 +253,12 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar
if openmp: if openmp:
del kwargs['cpu_openmp'] del kwargs['cpu_openmp']
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) ast = create_kernel(final_assignments,
ghost_layers=ghost_layers,
target=target,
optimizations=optimizations,
**kwargs)
if target == 'cpu': if target == 'cpu':
remove_conditionals_in_staggered_kernel(ast) remove_conditionals_in_staggered_kernel(ast)
......
...@@ -3,7 +3,7 @@ from pystencils.transformations import insert_casts ...@@ -3,7 +3,7 @@ from pystencils.transformations import insert_casts
def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(), def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(),
iteration_slice=None, ghost_layers=None): iteration_slice=None, ghost_layers=None, optimizations=None):
""" """
Creates an abstract syntax tree for a kernel function, by taking a list of update rules. Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
...@@ -26,7 +26,13 @@ def create_kernel(assignments, function_name="kernel", type_info=None, split_gro ...@@ -26,7 +26,13 @@ def create_kernel(assignments, function_name="kernel", type_info=None, split_gro
:return: :class:`pystencils.ast.KernelFunction` node :return: :class:`pystencils.ast.KernelFunction` node
""" """
from pystencils.cpu import create_kernel from pystencils.cpu import create_kernel
code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers) code = create_kernel(assignments,
function_name,
type_info,
split_groups,
iteration_slice,
ghost_layers,
optimizations=optimizations)
code.body = insert_casts(code.body) code.body = insert_casts(code.body)
code._compile_function = make_python_function code._compile_function = make_python_function
code._backend = 'llvm' code._backend = 'llvm'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment