Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (6)
...@@ -5,12 +5,12 @@ from typing import Any, List, Optional, Sequence, Set, Union ...@@ -5,12 +5,12 @@ from typing import Any, List, Optional, Sequence, Set, Union
import sympy as sp import sympy as sp
import pystencils from pystencils.assignment import Assignment
from pystencils.typing.utilities import create_type, get_next_parent_of_type
from pystencils.enums import Target, Backend from pystencils.enums import Target, Backend
from pystencils.field import Field from pystencils.field import Field
from pystencils.typing.typed_sympy import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol, TypedSymbol
from pystencils.sympyextensions import fast_subs from pystencils.sympyextensions import fast_subs
from pystencils.typing import (create_type, get_next_parent_of_type,
FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol, TypedSymbol, CFunction)
NodeOrExpr = Union['Node', sp.Expr] NodeOrExpr = Union['Node', sp.Expr]
...@@ -270,6 +270,9 @@ class KernelFunction(Node): ...@@ -270,6 +270,9 @@ class KernelFunction(Node):
parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols] parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
if hasattr(self, 'indexing'): if hasattr(self, 'indexing'):
parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()] parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
# Exclude paramters of type CFunction. These parameters will result in a C function call that will be handled
# by including a respective header file in the compute kernel. Hence, it is not a free parameter.
parameters = [p for p in parameters if not isinstance(p.symbol, CFunction)]
parameters.sort(key=lambda p: p.symbol.name) parameters.sort(key=lambda p: p.symbol.name)
return parameters return parameters
...@@ -387,7 +390,7 @@ class Block(Node): ...@@ -387,7 +390,7 @@ class Block(Node):
def symbols_defined(self): def symbols_defined(self):
result = set() result = set()
for a in self.args: for a in self.args:
if isinstance(a, pystencils.Assignment): if isinstance(a, Assignment):
result.update(a.free_symbols) result.update(a.free_symbols)
else: else:
result.update(a.symbols_defined) result.update(a.symbols_defined)
...@@ -398,7 +401,7 @@ class Block(Node): ...@@ -398,7 +401,7 @@ class Block(Node):
result = set() result = set()
defined_symbols = set() defined_symbols = set()
for a in self.args: for a in self.args:
if isinstance(a, pystencils.Assignment): if isinstance(a, Assignment):
result.update(a.free_symbols) result.update(a.free_symbols)
defined_symbols.update({a.lhs}) defined_symbols.update({a.lhs})
else: else:
......
from pystencils.typing import CFunction
def get_argument_string(function_shortcut, first=''): def get_argument_string(function_shortcut, first=''):
args = function_shortcut[function_shortcut.index('[') + 1: -1] args = function_shortcut[function_shortcut.index('[') + 1: -1]
arg_string = "(" arg_string = "("
...@@ -66,10 +69,10 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'): ...@@ -66,10 +69,10 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
if instruction_set.startswith('sve') or instruction_set == 'sme': if instruction_set.startswith('sve') or instruction_set == 'sme':
base_names['stream'] = 'stnt1[0, 1]' base_names['stream'] = 'stnt1[0, 1]'
prefix = 'sv' prefix = 'sv'
suffix = f'_f{bits[data_type]}' suffix = f'_f{bits[data_type]}'
elif instruction_set == 'neon': elif instruction_set == 'neon':
prefix = 'v' prefix = 'v'
suffix = f'q_f{bits[data_type]}' suffix = f'q_f{bits[data_type]}'
if instruction_set in ['sve', 'sve2', 'sme']: if instruction_set in ['sve', 'sve2', 'sme']:
predicate = f'{prefix}whilelt_b{bits[data_type]}_u64({{loop_counter}}, {{loop_stop}})' predicate = f'{prefix}whilelt_b{bits[data_type]}_u64({{loop_counter}}, {{loop_stop}})'
...@@ -91,7 +94,6 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'): ...@@ -91,7 +94,6 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result[intrinsic_id] = prefix + name + suffix + undef + arg_string result[intrinsic_id] = prefix + name + suffix + undef + arg_string
if instruction_set in ['sve', 'sve2', 'sme']: if instruction_set in ['sve', 'sve2', 'sme']:
from pystencils.backends.cbackend import CFunction
result['width'] = CFunction(width, "int") result['width'] = CFunction(width, "int")
result['intwidth'] = CFunction(intwidth, "int") result['intwidth'] = CFunction(intwidth, "int")
else: else:
...@@ -134,7 +136,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'): ...@@ -134,7 +136,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result['maskStoreS'] = result['storeS'].replace(predicate, '{3}') result['maskStoreS'] = result['storeS'].replace(predicate, '{3}')
if instruction_set.startswith('sve2') and instruction_set not in ('sve256', 'sve2048'): if instruction_set.startswith('sve2') and instruction_set not in ('sve256', 'sve2048'):
result['maskStreamS'] = result['streamS'].replace(predicate, '{3}') result['maskStreamS'] = result['streamS'].replace(predicate, '{3}')
result['streamFence'] = '__dmb(15)' result['streamFence'] = '__dmb(15)'
if instruction_set == 'sme': if instruction_set == 'sme':
......
...@@ -6,7 +6,6 @@ from typing import Set ...@@ -6,7 +6,6 @@ from typing import Set
import numpy as np import numpy as np
import sympy as sp import sympy as sp
from sympy.core import S from sympy.core import S
from sympy.core.cache import cacheit
from sympy.logic.boolalg import BooleanFalse, BooleanTrue from sympy.logic.boolalg import BooleanFalse, BooleanTrue
from sympy.functions.elementary.trigonometric import TrigonometricFunction, InverseTrigonometricFunction from sympy.functions.elementary.trigonometric import TrigonometricFunction, InverseTrigonometricFunction
from sympy.functions.elementary.hyperbolic import HyperbolicFunction from sympy.functions.elementary.hyperbolic import HyperbolicFunction
...@@ -15,7 +14,7 @@ from pystencils.astnodes import KernelFunction, LoopOverCoordinate, Node ...@@ -15,7 +14,7 @@ from pystencils.astnodes import KernelFunction, LoopOverCoordinate, Node
from pystencils.cpu.vectorization import vec_all, vec_any, CachelineSize from pystencils.cpu.vectorization import vec_all, vec_any, CachelineSize
from pystencils.typing import ( from pystencils.typing import (
PointerType, VectorType, CastFunc, create_type, get_type_of_expression, PointerType, VectorType, CastFunc, create_type, get_type_of_expression,
ReinterpretCastFunc, VectorMemoryAccess, BasicType, TypedSymbol) ReinterpretCastFunc, VectorMemoryAccess, BasicType, TypedSymbol, CFunction)
from pystencils.enums import Backend from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.functions import DivFunc, AddressOf from pystencils.functions import DivFunc, AddressOf
...@@ -166,23 +165,6 @@ class PrintNode(CustomCodeNode): ...@@ -166,23 +165,6 @@ class PrintNode(CustomCodeNode):
self.headers.append("<iostream>") self.headers.append("<iostream>")
class CFunction(TypedSymbol):
def __new__(cls, function, dtype):
return CFunction.__xnew_cached_(cls, function, dtype)
def __new_stage2__(cls, function, dtype):
return super(CFunction, cls).__xnew__(cls, function, dtype)
__xnew__ = staticmethod(__new_stage2__)
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
def __getnewargs__(self):
return self.name, self.dtype
def __getnewargs_ex__(self):
return (self.name, self.dtype), {}
# ------------------------------------------- Printer ------------------------------------------------------------------ # ------------------------------------------- Printer ------------------------------------------------------------------
...@@ -634,7 +616,7 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter): ...@@ -634,7 +616,7 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
return None return None
def _print_Abs(self, expr): def _print_Abs(self, expr):
if 'abs' in self.instruction_set and isinstance(expr.args[0], VectorMemoryAccess): if isinstance(get_type_of_expression(expr), (VectorType, VectorMemoryAccess)):
return self.instruction_set['abs'].format(self._print(expr.args[0]), **self._kwargs) return self.instruction_set['abs'].format(self._print(expr.args[0]), **self._kwargs)
return super()._print_Abs(expr) return super()._print_Abs(expr)
......
from pystencils.typing import CFunction
def get_argument_string(function_shortcut, last=''): def get_argument_string(function_shortcut, last=''):
args = function_shortcut[function_shortcut.index('[') + 1: -1] args = function_shortcut[function_shortcut.index('[') + 1: -1]
arg_string = "(" arg_string = "("
...@@ -78,7 +81,6 @@ def get_vector_instruction_set_riscv(data_type='double', instruction_set='rvv'): ...@@ -78,7 +81,6 @@ def get_vector_instruction_set_riscv(data_type='double', instruction_set='rvv'):
result[intrinsic_id] = prefix + name + suffix2 + arg_string result[intrinsic_id] = prefix + name + suffix2 + arg_string
from pystencils.backends.cbackend import CFunction
result['width'] = CFunction(width, "int") result['width'] = CFunction(width, "int")
result['intwidth'] = CFunction(intwidth, "int") result['intwidth'] = CFunction(intwidth, "int")
......
...@@ -62,7 +62,7 @@ import numpy as np ...@@ -62,7 +62,7 @@ import numpy as np
from pystencils import FieldType from pystencils import FieldType
from pystencils.astnodes import LoopOverCoordinate from pystencils.astnodes import LoopOverCoordinate
from pystencils.backends.cbackend import generate_c, get_headers, CFunction from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.cpu.msvc_detection import get_environment from pystencils.cpu.msvc_detection import get_environment
from pystencils.include import get_pystencils_include_path from pystencils.include import get_pystencils_include_path
from pystencils.kernel_wrapper import KernelWrapper from pystencils.kernel_wrapper import KernelWrapper
...@@ -447,8 +447,6 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec ...@@ -447,8 +447,6 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
parameters.append(f"buffer_{field.name}.strides[{param.symbol.coordinate}] / {item_size}") parameters.append(f"buffer_{field.name}.strides[{param.symbol.coordinate}] / {item_size}")
elif param.is_field_shape: elif param.is_field_shape:
parameters.append(f"buffer_{param.field_name}.shape[{param.symbol.coordinate}]") parameters.append(f"buffer_{param.field_name}.shape[{param.symbol.coordinate}]")
elif type(param.symbol) is CFunction:
continue
else: else:
extract_function, target_type = type_mapping[param.symbol.dtype.numpy_dtype.type] extract_function, target_type = type_mapping[param.symbol.dtype.numpy_dtype.type]
pre_call_code += template_extract_scalar.format(extract_function=extract_function, pre_call_code += template_extract_scalar.format(extract_function=extract_function,
......
...@@ -72,9 +72,11 @@ def create_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig): ...@@ -72,9 +72,11 @@ def create_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig):
if len(indexed_elements) > 0: if len(indexed_elements) > 0:
common_indexed_element = get_common_indexed_element(indexed_elements) common_indexed_element = get_common_indexed_element(indexed_elements)
index = common_indexed_element.indices[0].atoms(TypedSymbol)
assert len(index) == 1, "index expressions must only contain one symbol representing the index"
indexing = indexing_creator(iteration_space=(slice(0, common_indexed_element.shape[0], 1), *iteration_space), indexing = indexing_creator(iteration_space=(slice(0, common_indexed_element.shape[0], 1), *iteration_space),
data_layout=common_field.layout) data_layout=common_field.layout)
extended_ctrs = [common_indexed_element.indices[0], *loop_counter_symbols] extended_ctrs = [index.pop(), *loop_counter_symbols]
loop_counter_assignments = indexing.get_loop_ctr_assignments(extended_ctrs) loop_counter_assignments = indexing.get_loop_ctr_assignments(extended_ctrs)
else: else:
indexing = indexing_creator(iteration_space=iteration_space, data_layout=common_field.layout) indexing = indexing_creator(iteration_space=iteration_space, data_layout=common_field.layout)
......
...@@ -276,8 +276,10 @@ def add_outer_loop_over_indexed_elements(loop_node: ast.Block) -> ast.Block: ...@@ -276,8 +276,10 @@ def add_outer_loop_over_indexed_elements(loop_node: ast.Block) -> ast.Block:
if len(indexed_elements) == 0: if len(indexed_elements) == 0:
return loop_node return loop_node
reference_element = get_common_indexed_element(indexed_elements) reference_element = get_common_indexed_element(indexed_elements)
index = reference_element.indices[0].atoms(TypedSymbol)
assert len(index) == 1, "index expressions must only contain one symbol representing the index"
new_loop = ast.LoopOverCoordinate(loop_node, 0, 0, new_loop = ast.LoopOverCoordinate(loop_node, 0, 0,
reference_element.shape[0], 1, custom_loop_ctr=reference_element.indices[0]) reference_element.shape[0], 1, custom_loop_ctr=index.pop())
return ast.Block([new_loop]) return ast.Block([new_loop])
......
...@@ -3,14 +3,14 @@ from pystencils.typing.cast_functions import (CastFunc, BooleanCastFunc, VectorM ...@@ -3,14 +3,14 @@ from pystencils.typing.cast_functions import (CastFunc, BooleanCastFunc, VectorM
from pystencils.typing.types import (is_supported_type, numpy_name_to_c, AbstractType, BasicType, VectorType, from pystencils.typing.types import (is_supported_type, numpy_name_to_c, AbstractType, BasicType, VectorType,
PointerType, StructType, create_type) PointerType, StructType, create_type)
from pystencils.typing.typed_sympy import (assumptions_from_dtype, TypedSymbol, FieldStrideSymbol, FieldShapeSymbol, from pystencils.typing.typed_sympy import (assumptions_from_dtype, TypedSymbol, FieldStrideSymbol, FieldShapeSymbol,
FieldPointerSymbol) FieldPointerSymbol, CFunction)
from pystencils.typing.utilities import (typed_symbols, get_base_type, result_type, collate_types, from pystencils.typing.utilities import (typed_symbols, get_base_type, result_type, collate_types,
get_type_of_expression, get_next_parent_of_type, parents_of_type) get_type_of_expression, get_next_parent_of_type, parents_of_type)
__all__ = ['CastFunc', 'BooleanCastFunc', 'VectorMemoryAccess', 'ReinterpretCastFunc', 'PointerArithmeticFunc', __all__ = ['CastFunc', 'BooleanCastFunc', 'VectorMemoryAccess', 'ReinterpretCastFunc', 'PointerArithmeticFunc',
'is_supported_type', 'numpy_name_to_c', 'AbstractType', 'BasicType', 'is_supported_type', 'numpy_name_to_c', 'AbstractType', 'BasicType',
'VectorType', 'PointerType', 'StructType', 'create_type', 'VectorType', 'PointerType', 'StructType', 'create_type', 'assumptions_from_dtype',
'assumptions_from_dtype', 'TypedSymbol', 'FieldStrideSymbol', 'FieldShapeSymbol', 'FieldPointerSymbol', 'TypedSymbol', 'FieldStrideSymbol', 'FieldShapeSymbol', 'FieldPointerSymbol', 'CFunction',
'typed_symbols', 'get_base_type', 'result_type', 'collate_types', 'typed_symbols', 'get_base_type', 'result_type', 'collate_types',
'get_type_of_expression', 'get_next_parent_of_type', 'parents_of_type'] 'get_type_of_expression', 'get_next_parent_of_type', 'parents_of_type']
...@@ -178,3 +178,20 @@ class FieldPointerSymbol(TypedSymbol): ...@@ -178,3 +178,20 @@ class FieldPointerSymbol(TypedSymbol):
__xnew__ = staticmethod(__new_stage2__) __xnew__ = staticmethod(__new_stage2__)
__xnew_cached_ = staticmethod(cacheit(__new_stage2__)) __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
class CFunction(TypedSymbol):
def __new__(cls, function, dtype):
return CFunction.__xnew_cached_(cls, function, dtype)
def __new_stage2__(cls, function, dtype):
return super(CFunction, cls).__xnew__(cls, function, dtype)
__xnew__ = staticmethod(__new_stage2__)
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
def __getnewargs__(self):
return self.name, self.dtype
def __getnewargs_ex__(self):
return (self.name, self.dtype), {}
...@@ -64,15 +64,15 @@ def test_indexed_domain_kernel(index_size, array_size, target, dtype): ...@@ -64,15 +64,15 @@ def test_indexed_domain_kernel(index_size, array_size, target, dtype):
src = sp.IndexedBase(TypedSymbol(f"_data_{f.name}", dtype=const_pointer_type), shape=index_src) src = sp.IndexedBase(TypedSymbol(f"_data_{f.name}", dtype=const_pointer_type), shape=index_src)
dst = sp.IndexedBase(TypedSymbol(f"_data_{g.name}", dtype=pointer_type), shape=index_dst) dst = sp.IndexedBase(TypedSymbol(f"_data_{g.name}", dtype=pointer_type), shape=index_dst)
update_rule = [ps.Assignment(FieldPointerSymbol("f", dtype, const=True), src[index]), update_rule = [ps.Assignment(FieldPointerSymbol("f", dtype, const=True), src[index + 1]),
ps.Assignment(FieldPointerSymbol("g", dtype, const=False), dst[index]), ps.Assignment(FieldPointerSymbol("g", dtype, const=False), dst[index + 1]),
ps.Assignment(g.center, f.center)] ps.Assignment(g.center, f.center)]
ast = ps.create_kernel(update_rule, target=target) ast = ps.create_kernel(update_rule, target=target)
code = ps.get_code_str(ast) code = ps.get_code_str(ast)
assert f"const {dtype.c_name} * RESTRICT _data_f = (({dtype.c_name} * RESTRICT const)(_data_f[index]));" in code assert f"const {dtype.c_name} * RESTRICT _data_f = (({dtype.c_name} * RESTRICT const)(_data_f[index + 1]));" in code
assert f"{dtype.c_name} * RESTRICT _data_g = (({dtype.c_name} * RESTRICT )(_data_g[index]));" in code assert f"{dtype.c_name} * RESTRICT _data_g = (({dtype.c_name} * RESTRICT )(_data_g[index + 1]));" in code
if target == Target.CPU: if target == Target.CPU:
assert code.count("for") == f.spatial_dimensions + 1 assert code.count("for") == f.spatial_dimensions + 1
......
...@@ -8,8 +8,10 @@ import sympy as sp ...@@ -8,8 +8,10 @@ import sympy as sp
import pystencils as ps import pystencils as ps
from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets, from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets,
get_vector_instruction_set) get_vector_instruction_set)
from . import test_vectorization
from pystencils.enums import Target from pystencils.enums import Target
from pystencils.typing import CFunction
from . import test_vectorization
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else [] supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
...@@ -39,7 +41,7 @@ def test_vectorisation_varying_arch(instruction_set): ...@@ -39,7 +41,7 @@ def test_vectorisation_varying_arch(instruction_set):
@pytest.mark.parametrize('dtype', ('float32', 'float64')) @pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_vectorized_abs(instruction_set, dtype): def test_vectorized_abs_field(instruction_set, dtype):
"""Some instructions sets have abs, some don't. """Some instructions sets have abs, some don't.
Furthermore, the special treatment of unary minus makes this data type-sensitive too. Furthermore, the special treatment of unary minus makes this data type-sensitive too.
""" """
...@@ -58,6 +60,24 @@ def test_vectorized_abs(instruction_set, dtype): ...@@ -58,6 +60,24 @@ def test_vectorized_abs(instruction_set, dtype):
np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3) np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_vectorized_abs_scalar(instruction_set):
"""Some instructions sets have abs, some don't.
Furthermore, the special treatment of unary minus makes this data type-sensitive too.
"""
arr = np.zeros((2 ** 2 + 2, 2 ** 3 + 2), dtype="float64")
f = ps.fields(f=arr)
update_rule = [ps.Assignment(f.center(), sp.Abs(sp.Symbol("a")))]
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
ast = ps.create_kernel(update_rule, config=config)
func = ast.compile()
func(f=arr, a=-1)
np.testing.assert_equal(np.sum(arr[1:-1, 1:-1]), 2 ** 2 * 2 ** 3)
@pytest.mark.parametrize('dtype', ('float32', 'float64')) @pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('nontemporal', [False, True]) @pytest.mark.parametrize('nontemporal', [False, True])
...@@ -274,6 +294,22 @@ def test_div_and_unevaluated_expr(dtype, instruction_set): ...@@ -274,6 +294,22 @@ def test_div_and_unevaluated_expr(dtype, instruction_set):
assert 'pow' not in code assert 'pow' not in code
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('instruction_set', ('sve', 'sve2', 'sme', 'rvv'))
def test_check_ast_parameters_sizeless(dtype, instruction_set):
f, g = ps.fields(f"f, g: {dtype}[3D]", layout='fzyx')
update_rule = [ps.Assignment(g.center(), 2 * f.center())]
config = pystencils.config.CreateKernelConfig(data_type=dtype,
cpu_vectorize_info={'instruction_set': instruction_set})
ast = ps.create_kernel(update_rule, config=config)
ast_symbols = [p.symbol for p in ast.get_parameters()]
assert ast.instruction_set['width'] not in ast_symbols
assert ast.instruction_set['intwidth'] not in ast_symbols
# TODO this test case needs a complete rework of the vectoriser. The reason is that the vectoriser does not # TODO this test case needs a complete rework of the vectoriser. The reason is that the vectoriser does not
# TODO vectorise symbols at the moment because they could be strides or field sizes, thus involved in pointer arithmetic # TODO vectorise symbols at the moment because they could be strides or field sizes, thus involved in pointer arithmetic
# TODO This means that the vectoriser only works if fields are involved on the rhs. # TODO This means that the vectoriser only works if fields are involved on the rhs.
......