diff --git a/pystencils/backends/x86_instruction_sets.py b/pystencils/backends/x86_instruction_sets.py index f72b48266195dd1a30149325e5949723a6b9ac7e..db3dc362cf52266d90980b11d2d5c877bb491ba4 100644 --- a/pystencils/backends/x86_instruction_sets.py +++ b/pystencils/backends/x86_instruction_sets.py @@ -115,8 +115,8 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'): result[intrinsic_id] = pre + "_" + name + "_" + suf + mask_suffix + arg_string result['dataTypePrefix'] = { - 'double': "_" + pre + 'd', - 'float': "_" + pre, + 'double': "_" + pre[0:2] + pre[3:] + 'd', + 'float': "_" + pre[0:2] + pre[3:], } bit_width = result['width'] * (64 if data_type == 'double' else 32) diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index 812a6163465295911f8b252a2a4eab0af7ec2417..47a529a4eaa2f86947f3031b05d22d00e45d5f86 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -7,8 +7,8 @@ from sympy.logic.boolalg import BooleanFunction, BooleanAtom import pystencils.astnodes as ast from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set -from pystencils.typing import ( - PointerType, TypedSymbol, VectorType, CastFunc, collate_types, get_type_of_expression, VectorMemoryAccess) +from pystencils.typing import ( BasicType, PointerType, TypedSymbol, VectorType, CastFunc, collate_types, + get_type_of_expression, VectorMemoryAccess) from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt from pystencils.functions import DivFunc from pystencils.field import Field @@ -252,7 +252,11 @@ def insert_vector_casts(ast_node, default_float_type='double'): if isinstance(expr, VectorMemoryAccess): return VectorMemoryAccess(*expr.args[0:4], visit_expr(expr.args[4], default_type), *expr.args[5:]) elif isinstance(expr, CastFunc): - return expr # TODO here, since CastFunc might not be vector??? + cast_type = expr.args[1] + arg = visit_expr(expr.args[0]) + assert(cast_type in [BasicType('float32'), BasicType('float64')], + f'Vectorization cannot vectorize type {cast_type}') + return expr.func(arg, VectorType(cast_type)) elif expr.func is sp.Abs and 'abs' not in ast_node.instruction_set: new_arg = visit_expr(expr.args[0], default_type) base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is VectorMemoryAccess \ @@ -311,13 +315,18 @@ def insert_vector_casts(ast_node, default_float_type='double'): elif isinstance(expr, (sp.Number, TypedSymbol, BooleanAtom)): return expr else: - raise NotImplementedError(f'Should I raise or should I return now? {expr}') + # TODO better error string + raise NotImplementedError(f'Should I raise or should I return now? {type(expr)} {expr}') def visit_node(node, substitution_dict, default_type='double'): substitution_dict = substitution_dict.copy() for arg in node.args: if isinstance(arg, ast.SympyAssignment): + # TODO only if not remainder loop (? if no VectorAccess then remainder loop) assignment = arg + # If there is a remainder loop we do not vectorise it, thus lhs will indicate this + if isinstance(assignment.lhs, ast.ResolvedFieldAccess): + continue subs_expr = fast_subs(assignment.rhs, substitution_dict, skip=lambda e: isinstance(e, ast.ResolvedFieldAccess)) assignment.rhs = visit_expr(subs_expr, default_type) diff --git a/pystencils/typing/leaf_typing.py b/pystencils/typing/leaf_typing.py index 20f92eabdf8afe88e039d165539d00335dcd95af..aa23de65d8ba9329cedb36acf047ac81e5f414f1 100644 --- a/pystencils/typing/leaf_typing.py +++ b/pystencils/typing/leaf_typing.py @@ -21,6 +21,7 @@ from pystencils.typing.types import BasicType, create_type, PointerType from pystencils.typing.utilities import get_type_of_expression, collate_types from pystencils.typing.cast_functions import CastFunc, BooleanCastFunc from pystencils.typing.typed_sympy import TypedSymbol +from pystencils.fast_approximation import fast_sqrt, fast_division, fast_inv_sqrt from pystencils.utils import ContextVar @@ -215,6 +216,12 @@ class TypeAdder: return new_func, collated_type else: return CastFunc(new_func, collated_type), collated_type + elif isinstance(expr, (fast_sqrt, fast_division, fast_inv_sqrt)): + args_types = [self.figure_out_type(arg) for arg in expr.args] + collated_type = BasicType('float32') + new_args = [a if t.dtype_eq(collated_type) else CastFunc(a, collated_type) for a, t in args_types] + new_func = expr.func(*new_args) if new_args else expr + return CastFunc(new_func, collated_type), collated_type elif isinstance(expr, (sp.Add, sp.Mul, sp.Abs, sp.Min, sp.Max, DivFunc, sp.UnevaluatedExpr)): args_types = [self.figure_out_type(arg) for arg in expr.args] collated_type = collate_types([t for _, t in args_types]) diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index a7a335c7592f87df4524276dffd18b03c8f0a1c8..9c9a99c3458797255cd3f1b8f41eb9e0e46af4ca 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -4,7 +4,7 @@ import pystencils.config import sympy as sp import pystencils as ps -from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets +from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set from pystencils.cpu.vectorization import vectorize from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions from pystencils.enums import Target @@ -13,10 +13,25 @@ from pystencils.transformations import replace_inner_stride_with_one supported_instruction_sets = get_supported_instruction_sets() if supported_instruction_sets: instruction_set = supported_instruction_sets[-1] + instructions = get_vector_instruction_set(instruction_set=instruction_set) else: instruction_set = None +# CI: +# FAILED pystencils_tests/test_vectorization.py::test_vectorised_pow - NotImple... +# FAILED pystencils_tests/test_vectorization.py::test_inplace_update - NotImple... +# FAILED pystencils_tests/test_vectorization.py::test_vectorization_fixed_size +# FAILED pystencils_tests/test_vectorization.py::test_vectorised_fast_approximations +# FAILED pystencils_tests/test_vectorization.py::test_vectorization_variable_size + +# Jan: +# test_aligned_and_nt_stores +# test_aligned_and_nt_stores_openmp +# test_hardware_query +# test_vectorised_fast_approximations + +# TODO: Skip tests if no instruction set is available def test_vector_type_propagation(instruction_set=instruction_set): a, b, c, d, e = sp.symbols("a b c d e") arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2)) @@ -118,6 +133,10 @@ def test_vectorization_fixed_size(instruction_set=instruction_set): ast = ps.create_kernel(update_rule) vectorize(ast, instruction_set=instruction_set) + code = ps.get_code_str(ast) + add_instruction = instructions["+"][:instructions["+"].find("(")] + assert add_instruction in code + # print(code) func = ast.compile() dst = np.zeros_like(arr)