diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py index 0c49160c8ba6beb4e9aa58707ec2697d0f47663a..f50ef65a938f48c9102b97b8969fe99019b59b70 100644 --- a/pystencils/astnodes.py +++ b/pystencils/astnodes.py @@ -303,7 +303,7 @@ class SkipIteration(Node): class Block(Node): - def __init__(self, nodes: List[Node]): + def __init__(self, nodes: Union[Node, List[Node]]): super(Block, self).__init__() if not isinstance(nodes, list): nodes = [nodes] diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py index c93d5ed72c7d2c965420d2a176b99e2d7e0121f9..d9c9198642d9c9b52930794012413512c43edf16 100644 --- a/pystencils/cpu/kernelcreation.py +++ b/pystencils/cpu/kernelcreation.py @@ -1,9 +1,6 @@ -from typing import Union - import sympy as sp import pystencils.astnodes as ast -from pystencils.simp.assignment_collection import AssignmentCollection from pystencils.config import CreateKernelConfig from pystencils.enums import Target, Backend from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment @@ -18,7 +15,7 @@ from pystencils.transformations import ( resolve_field_accesses, split_inner_loop) -def create_kernel(assignments: Union[NodeCollection], +def create_kernel(assignments: NodeCollection, config: CreateKernelConfig) -> KernelFunction: """Creates an abstract syntax tree for a kernel function, by taking a list of update rules. @@ -94,7 +91,7 @@ def create_kernel(assignments: Union[NodeCollection], return ast_node -def create_indexed_kernel(assignments: Union[AssignmentCollection, NodeCollection], +def create_indexed_kernel(assignments: NodeCollection, config: CreateKernelConfig) -> KernelFunction: """ Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with @@ -115,21 +112,24 @@ def create_indexed_kernel(assignments: Union[AssignmentCollection, NodeCollectio fields_written = assignments.bound_fields fields_read = assignments.rhs_fields + all_fields = fields_read.union(fields_written) + # extract the index fields based on the name. The original index field might have been modified + index_fields = [idx_field for idx_field in index_fields if idx_field.name in [f.name for f in all_fields]] + non_index_fields = [f for f in all_fields if f not in index_fields] + spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} + assert len(spatial_coordinates) == 1, f"Non-index fields do not have the same number of spatial coordinates " \ + f"Non index fields are {non_index_fields}, spatial coordinates are " \ + f"{spatial_coordinates}" + spatial_coordinates = list(spatial_coordinates)[0] + assignments = assignments.all_assignments assignments = add_types(assignments, config) - all_fields = fields_read.union(fields_written) - for index_field in index_fields: index_field.field_type = FieldType.INDEXED assert FieldType.is_indexed(index_field) assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" - non_index_fields = [f for f in all_fields if f not in index_fields] - spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} - assert len(spatial_coordinates) == 1, "Non-index fields do not have the same number of spatial coordinates" - spatial_coordinates = list(spatial_coordinates)[0] - def get_coordinate_symbol_assignment(name): for idx_field in index_fields: assert isinstance(idx_field.dtype, StructType), "Index fields have to have a struct data type" diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index ae4c0e38894043326548efb63d08c6d79103486c..67cb91221d3212cacb6328bd48291a2bb75a6d24 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -257,6 +257,27 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'): handled_functions = (sp.Add, sp.Mul, vec_any, vec_all, DivFunc, sp.Abs) + def is_scalar(expr) -> bool: + if hasattr(expr, "dtype"): + if type(expr.dtype) is VectorType: + return False + # Else branch: If expr is a CastFunc, then whether the expression + # is scalar is determined by the argument (remember: vector casts + # are not inserted yet). Therefore, we must recurse into the args of + # expr below. Otherwise, this expression is atomic and in that case + # it is assumed to be scalar below. + + if isinstance(expr, ast.ResolvedFieldAccess): + # expr.field is not in expr.args + return is_scalar(expr.field) + elif isinstance(expr, (vec_any, vec_all)): + return True + + if not hasattr(expr, "args"): + return True + + return all(is_scalar(arg) for arg in expr.args) + # TODO Vectorization Revamp: get rid of default_type def visit_expr(expr, default_type='double', force_vectorize=False): if isinstance(expr, VectorMemoryAccess): @@ -369,15 +390,15 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'): # If either side contains a vectorized subexpression, both sides # must be fully vectorized. - lhs_type = get_type_of_expression(assignment.lhs) - rhs_type = get_type_of_expression(subs_expr) - lhs_vectorized = type(lhs_type) is VectorType - rhs_vectorized = type(rhs_type) is VectorType + lhs_scalar = is_scalar(assignment.lhs) + rhs_scalar = is_scalar(subs_expr) - assignment.rhs = visit_expr(subs_expr, default_type, force_vectorize=lhs_vectorized or rhs_vectorized) + assignment.rhs = visit_expr(subs_expr, default_type, force_vectorize=not (lhs_scalar and rhs_scalar)) if isinstance(assignment.lhs, TypedSymbol): - if rhs_vectorized and not lhs_vectorized: + if lhs_scalar and not rhs_scalar: + lhs_type = get_type_of_expression(assignment.lhs) + rhs_type = get_type_of_expression(assignment.rhs) new_lhs_type = VectorType(lhs_type, rhs_type.width) new_lhs = TypedSymbol(assignment.lhs.name, new_lhs_type) substitution_dict[assignment.lhs] = new_lhs diff --git a/pystencils/gpu/indexing.py b/pystencils/gpu/indexing.py index 27cb39e454913db4faab579dc366a879c1eb04b7..a52cf2a3ff536836c243f1da5dae81f708210f6a 100644 --- a/pystencils/gpu/indexing.py +++ b/pystencils/gpu/indexing.py @@ -398,7 +398,8 @@ def _loop_ctr_assignments(loop_counter_symbols, coordinates, iteration_space): loop_ctr_assignments = [] for loop_counter, coordinate, iter_slice in zip(loop_counter_symbols, coordinates, iteration_space): if isinstance(iter_slice, slice) and iter_slice.step > 1: - loop_ctr_assignments.append(SympyAssignment(loop_counter, coordinate * iter_slice.step)) + offset = (iter_slice.step * iter_slice.start) - iter_slice.start + loop_ctr_assignments.append(SympyAssignment(loop_counter, coordinate * iter_slice.step - offset)) elif iter_slice.start == iter_slice.stop: loop_ctr_assignments.append(SympyAssignment(loop_counter, 0)) else: diff --git a/pystencils/gpu/kernelcreation.py b/pystencils/gpu/kernelcreation.py index c0d6e71d05e3a2250249224e3a12e3daa30978d0..7f420ca85570828b07065c4d305549d659261a2c 100644 --- a/pystencils/gpu/kernelcreation.py +++ b/pystencils/gpu/kernelcreation.py @@ -1,5 +1,3 @@ -from typing import Union - from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment from pystencils.config import CreateKernelConfig from pystencils.typing import StructType, TypedSymbol @@ -9,15 +7,13 @@ from pystencils.enums import Target, Backend from pystencils.gpu.gpujit import make_python_function from pystencils.node_collection import NodeCollection from pystencils.gpu.indexing import indexing_creator_from_params -from pystencils.simp.assignment_collection import AssignmentCollection from pystencils.slicing import normalize_slice from pystencils.transformations import ( get_base_buffer_index, get_common_field, parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols) -def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection], - config: CreateKernelConfig): +def create_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig): function_name = config.function_name indexing_creator = indexing_creator_from_params(config.gpu_indexing, config.gpu_indexing_params) @@ -114,33 +110,34 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection], return ast -def created_indexed_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection], - config: CreateKernelConfig): +def created_indexed_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig): index_fields = config.index_fields function_name = config.function_name coordinate_names = config.coordinate_names indexing_creator = indexing_creator_from_params(config.gpu_indexing, config.gpu_indexing_params) - fields_written = assignments.bound_fields fields_read = assignments.rhs_fields - assignments = assignments.all_assignments - - assignments = add_types(assignments, config) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) + # extract the index fields based on the name. The original index field might have been modified + index_fields = [idx_field for idx_field in index_fields if idx_field.name in [f.name for f in all_fields]] + non_index_fields = [f for f in all_fields if f not in index_fields] + spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} + assert len(spatial_coordinates) == 1, f"Non-index fields do not have the same number of spatial coordinates " \ + f"Non index fields are {non_index_fields}, spatial coordinates are " \ + f"{spatial_coordinates}" + spatial_coordinates = list(spatial_coordinates)[0] + + assignments = assignments.all_assignments + assignments = add_types(assignments, config) for index_field in index_fields: index_field.field_type = FieldType.INDEXED assert FieldType.is_indexed(index_field) assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" - non_index_fields = [f for f in all_fields if f not in index_fields] - spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} - assert len(spatial_coordinates) == 1, "Non-index fields do not have the same number of spatial coordinates" - spatial_coordinates = list(spatial_coordinates)[0] - def get_coordinate_symbol_assignment(name): for ind_f in index_fields: assert isinstance(ind_f.dtype, StructType), "Index fields have to have a struct data type" diff --git a/pystencils/node_collection.py b/pystencils/node_collection.py index 352406566e47c9279bd486410d34f6b6b9bfff53..e0af05fd055bf8df0443c7f9401e352138f6b303 100644 --- a/pystencils/node_collection.py +++ b/pystencils/node_collection.py @@ -1,34 +1,42 @@ -from collections.abc import Iterable from typing import Any, Dict, List, Union, Optional, Set import sympy import sympy as sp -from sympy.codegen.ast import Assignment, AddAugmentedAssignment from sympy.codegen.rewriting import ReplaceOptim, optimize -from pystencils.astnodes import Block, Node, SympyAssignment +from pystencils.assignment import Assignment, AddAugmentedAssignment +import pystencils.astnodes as ast from pystencils.backends.cbackend import CustomCodeNode from pystencils.functions import DivFunc from pystencils.simp import AssignmentCollection class NodeCollection: - def __init__(self, assignments: List[Union[Node, Assignment]], + def __init__(self, assignments: List[Union[ast.Node, Assignment]], simplification_hints: Optional[Dict[str, Any]] = None, bound_fields: Set[sp.Symbol] = None, rhs_fields: Set[sp.Symbol] = None): - nodes = list() - assignments = [assignments, ] if not isinstance(assignments, Iterable) else assignments - for assignment in assignments: - if isinstance(assignment, Assignment): - nodes.append(SympyAssignment(assignment.lhs, assignment.rhs)) - elif isinstance(assignment, AddAugmentedAssignment): - nodes.append(SympyAssignment(assignment.lhs, assignment.lhs + assignment.rhs)) - elif isinstance(assignment, Node): - nodes.append(assignment) + + def visit(obj): + if isinstance(obj, (list, tuple)): + return [visit(e) for e in obj] + if isinstance(obj, Assignment): + return ast.SympyAssignment(obj.lhs, obj.rhs) + elif isinstance(obj, AddAugmentedAssignment): + return ast.SympyAssignment(obj.lhs, obj.lhs + obj.rhs) + elif isinstance(obj, ast.SympyAssignment): + return obj + elif isinstance(obj, ast.Conditional): + true_block = visit(obj.true_block) + false_block = None if obj.false_block is None else visit(obj.false_block) + return ast.Conditional(obj.condition_expr, true_block=true_block, false_block=false_block) + elif isinstance(obj, ast.Block): + return ast.Block([visit(e) for e in obj.args]) + elif isinstance(obj, ast.Node) and not isinstance(obj, ast.LoopOverCoordinate): + return obj else: - raise ValueError(f"Unknown node in the AssignmentCollection: {assignment}") + raise ValueError("Invalid object in the List of Assignments " + str(type(obj))) - self.all_assignments = nodes + self.all_assignments = visit(assignments) self.simplification_hints = simplification_hints if simplification_hints else {} self.bound_fields = bound_fields if bound_fields else {} self.rhs_fields = rhs_fields if rhs_fields else {} @@ -57,13 +65,13 @@ class NodeCollection: def visitor(node): if isinstance(node, CustomCodeNode): return node - elif isinstance(node, Block): + elif isinstance(node, ast.Block): return node.func([visitor(child) for child in node.args]) - elif isinstance(node, SympyAssignment): + elif isinstance(node, ast.SympyAssignment): new_lhs = visitor(node.lhs) new_rhs = visitor(node.rhs) return node.func(new_lhs, new_rhs, node.is_const, node.use_auto) - elif isinstance(node, Node): + elif isinstance(node, ast.Node): return node.func(*[visitor(child) for child in node.args]) elif isinstance(node, sympy.Basic): return optimize(node, sympy_optimisations) diff --git a/pystencils_tests/test_modulo.py b/pystencils_tests/test_modulo.py index 5a32acf5c3763e1f427e136ab12034b8c817c426..66a3772c477808b08101cae930f4847fc55b4732 100644 --- a/pystencils_tests/test_modulo.py +++ b/pystencils_tests/test_modulo.py @@ -1,16 +1,23 @@ import pytest +import numpy as np import sympy as sp import pystencils as ps from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment +SLICE_LIST = [False, + ps.make_slice[1:-1:2, 1:-1:2], + ps.make_slice[2:-1:2, 4:-1:7], + ps.make_slice[4:-1:2, 5:-1:2], + ps.make_slice[3:-1:4, 7:-1:3]] + @pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU]) -@pytest.mark.parametrize('iteration_slice', [False, True]) +@pytest.mark.parametrize('iteration_slice', SLICE_LIST) def test_mod(target, iteration_slice): if target == ps.Target.GPU: pytest.importorskip("cupy") - dh = ps.create_data_handling(domain_size=(5, 5), periodicity=True, default_target=target) + dh = ps.create_data_handling(domain_size=(51, 51), periodicity=True, default_target=target) loop_ctrs = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dh.dim)] cond = [sp.Eq(sp.Mod(loop_ctrs[i], 2), 1) for i in range(dh.dim)] @@ -20,7 +27,6 @@ def test_mod(target, iteration_slice): eq_list = [SympyAssignment(field.center, 1.0)] if iteration_slice: - iteration_slice = ps.make_slice[1:-1:2, 1:-1:2] config = ps.CreateKernelConfig(target=dh.default_target, iteration_slice=iteration_slice) assign = eq_list else: @@ -41,9 +47,5 @@ def test_mod(target, iteration_slice): result = dh.gather_array(field.name, ghost_layers=True) - for x in range(result.shape[0]): - for y in range(result.shape[1]): - if x % 2 == 1 and y % 2 == 1: - assert result[x, y] == 1.0 - else: - assert result[x, y] == 0.0 + assert np.all(result[iteration_slice] == 1.0) + diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index 83320a91f60677e05ab484f7e83b428df47adc64..3ab4bd39090520812042a9434ef6d37e20d13e99 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -20,7 +20,7 @@ else: # TODO: Skip tests if no instruction set is available and check all codes if they are really vectorised ! -def test_vector_type_propagation(instruction_set=instruction_set): +def test_vector_type_propagation1(instruction_set=instruction_set): a, b, c, d, e = sp.symbols("a b c d e") arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2)) arr *= 10.0 @@ -41,6 +41,49 @@ def test_vector_type_propagation(instruction_set=instruction_set): np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3) +def test_vector_type_propagation2(instruction_set=instruction_set): + data_type = 'float32' + assume_aligned = True + assume_inner_stride_one = True + assume_sufficient_line_padding = True + + domain_size = (24, 24) + dh = ps.create_data_handling(domain_size, default_target=Target.CPU) + + # fields + g = dh.add_array("g", values_per_cell=1, dtype=data_type, alignment=True, ghost_layers=0) + f = dh.add_array("f", values_per_cell=1, dtype=data_type, alignment=True, ghost_layers=0) + dh.fill("g", 1.0, ghost_layers=True) + dh.fill("f", 0.5, ghost_layers=True) + + collision = [ + ps.Assignment(sp.Symbol("b"), sp.sqrt(sp.Symbol("a") * (1 - (1 - f.center)**2))), + ps.Assignment(g.center, sp.Symbol("b")) + ] + + config = ps.CreateKernelConfig( + target=ps.Target.CPU, + data_type=data_type, + default_number_float=data_type, + cpu_vectorize_info={ + 'assume_aligned': assume_aligned, + 'assume_inner_stride_one': assume_inner_stride_one, + 'assume_sufficient_line_padding': assume_sufficient_line_padding, + }, + ghost_layers=0 + ) + + ast = ps.create_kernel(collision, config=config) + print(ast) + code = ps.get_code_str(ast) + print(code) + + kernel = ast.compile() + dh.run_kernel(kernel, a=4) + g_arr = dh.cpu_arrays['g'] + np.testing.assert_allclose(g_arr, np.full_like(g_arr, np.sqrt(3))) + + def test_vectorize_moved_constants1(instruction_set=instruction_set): opt = {'instruction_set': instruction_set, 'assume_inner_stride_one': True}