Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (2)
...@@ -428,7 +428,7 @@ class LoopOverCoordinate(Node): ...@@ -428,7 +428,7 @@ class LoopOverCoordinate(Node):
LOOP_COUNTER_NAME_PREFIX = "ctr" LOOP_COUNTER_NAME_PREFIX = "ctr"
BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr" BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False): def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False, custom_loop_ctr=None):
super(LoopOverCoordinate, self).__init__(parent=None) super(LoopOverCoordinate, self).__init__(parent=None)
self.body = body self.body = body
body.parent = self body.parent = self
...@@ -439,10 +439,11 @@ class LoopOverCoordinate(Node): ...@@ -439,10 +439,11 @@ class LoopOverCoordinate(Node):
self.body.parent = self self.body.parent = self
self.prefix_lines = [] self.prefix_lines = []
self.is_block_loop = is_block_loop self.is_block_loop = is_block_loop
self.custom_loop_ctr = custom_loop_ctr
def new_loop_with_different_body(self, new_body): def new_loop_with_different_body(self, new_body):
result = LoopOverCoordinate(new_body, self.coordinate_to_loop_over, self.start, self.stop, result = LoopOverCoordinate(new_body, self.coordinate_to_loop_over, self.start, self.stop,
self.step, self.is_block_loop) self.step, self.is_block_loop, self.custom_loop_ctr)
result.prefix_lines = [prefix_line for prefix_line in self.prefix_lines] result.prefix_lines = [prefix_line for prefix_line in self.prefix_lines]
return result return result
...@@ -505,10 +506,13 @@ class LoopOverCoordinate(Node): ...@@ -505,10 +506,13 @@ class LoopOverCoordinate(Node):
@property @property
def loop_counter_name(self): def loop_counter_name(self):
if self.is_block_loop: if self.custom_loop_ctr:
return LoopOverCoordinate.get_block_loop_counter_name(self.coordinate_to_loop_over) return self.custom_loop_ctr.name
else: else:
return LoopOverCoordinate.get_loop_counter_name(self.coordinate_to_loop_over) if self.is_block_loop:
return LoopOverCoordinate.get_block_loop_counter_name(self.coordinate_to_loop_over)
else:
return LoopOverCoordinate.get_loop_counter_name(self.coordinate_to_loop_over)
@staticmethod @staticmethod
def is_loop_counter_symbol(symbol): def is_loop_counter_symbol(symbol):
...@@ -532,10 +536,13 @@ class LoopOverCoordinate(Node): ...@@ -532,10 +536,13 @@ class LoopOverCoordinate(Node):
@property @property
def loop_counter_symbol(self): def loop_counter_symbol(self):
if self.is_block_loop: if self.custom_loop_ctr:
return self.get_block_loop_counter_symbol(self.coordinate_to_loop_over) return self.custom_loop_ctr
else: else:
return self.get_loop_counter_symbol(self.coordinate_to_loop_over) if self.is_block_loop:
return self.get_block_loop_counter_symbol(self.coordinate_to_loop_over)
else:
return self.get_loop_counter_symbol(self.coordinate_to_loop_over)
@property @property
def is_outermost_loop(self): def is_outermost_loop(self):
......
...@@ -248,12 +248,13 @@ class CBackend: ...@@ -248,12 +248,13 @@ class CBackend:
return f"{node.pragma_line}\n{self._print_Block(node)}" return f"{node.pragma_line}\n{self._print_Block(node)}"
def _print_LoopOverCoordinate(self, node): def _print_LoopOverCoordinate(self, node):
counter_symbol = node.loop_counter_name counter_name = node.loop_counter_name
start = f"int64_t {counter_symbol} = {self.sympy_printer.doprint(node.start)}" counter_dtype = node.loop_counter_symbol.dtype.c_name
condition = f"{counter_symbol} < {self.sympy_printer.doprint(node.stop)}" start = f"{counter_dtype} {counter_name} = {self.sympy_printer.doprint(node.start)}"
update = f"{counter_symbol} += {self.sympy_printer.doprint(node.step)}" condition = f"{counter_name} < {self.sympy_printer.doprint(node.stop)}"
update = f"{counter_name} += {self.sympy_printer.doprint(node.step)}"
loop_str = f"for ({start}; {condition}; {update})" loop_str = f"for ({start}; {condition}; {update})"
self._kwargs['loop_counter'] = counter_symbol self._kwargs['loop_counter'] = counter_name
self._kwargs['loop_stop'] = node.stop self._kwargs['loop_stop'] = node.stop
prefix = "\n".join(node.prefix_lines) prefix = "\n".join(node.prefix_lines)
...@@ -497,7 +498,11 @@ class CustomSympyPrinter(CCodePrinter): ...@@ -497,7 +498,11 @@ class CustomSympyPrinter(CCodePrinter):
return expr.to_c(self._print) return expr.to_c(self._print)
if isinstance(expr, ReinterpretCastFunc): if isinstance(expr, ReinterpretCastFunc):
arg, data_type = expr.args arg, data_type = expr.args
return f"*(({self._print(PointerType(data_type, restrict=False))})(& {self._print(arg)}))" if isinstance(data_type, PointerType):
const_str = "const" if data_type.const else ""
return f"(({const_str} {self._print(data_type.base_type)} *)(& {self._print(arg)}))"
else:
return f"*(({self._print(PointerType(data_type, restrict=False))})(& {self._print(arg)}))"
elif isinstance(expr, AddressOf): elif isinstance(expr, AddressOf):
assert len(expr.args) == 1, "address_of must only have one argument" assert len(expr.args) == 1, "address_of must only have one argument"
return f"&({self._print(expr.args[0])})" return f"&({self._print(expr.args[0])})"
......
...@@ -11,7 +11,8 @@ from pystencils.field import Field, FieldType ...@@ -11,7 +11,8 @@ from pystencils.field import Field, FieldType
from pystencils.node_collection import NodeCollection from pystencils.node_collection import NodeCollection
from pystencils.transformations import ( from pystencils.transformations import (
filtered_tree_iteration, iterate_loops_by_depth, get_base_buffer_index, get_optimal_loop_ordering, filtered_tree_iteration, iterate_loops_by_depth, get_base_buffer_index, get_optimal_loop_ordering,
make_loop_over_domain, move_constants_before_loop, parse_base_pointer_info, resolve_buffer_accesses, make_loop_over_domain, add_outer_loop_over_indexed_elements,
move_constants_before_loop, parse_base_pointer_info, resolve_buffer_accesses,
resolve_field_accesses, split_inner_loop) resolve_field_accesses, split_inner_loop)
...@@ -53,6 +54,8 @@ def create_kernel(assignments: NodeCollection, ...@@ -53,6 +54,8 @@ def create_kernel(assignments: NodeCollection,
loop_order = get_optimal_loop_ordering(fields_without_buffers) loop_order = get_optimal_loop_ordering(fields_without_buffers)
loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice, loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice,
ghost_layers=ghost_layers, loop_order=loop_order) ghost_layers=ghost_layers, loop_order=loop_order)
loop_node = add_outer_loop_over_indexed_elements(loop_node)
ast_node = KernelFunction(loop_node, Target.CPU, Backend.C, compile_function=make_python_function, ast_node = KernelFunction(loop_node, Target.CPU, Backend.C, compile_function=make_python_function,
ghost_layers=ghost_layer_info, function_name=function_name, assignments=assignments) ghost_layers=ghost_layer_info, function_name=function_name, assignments=assignments)
...@@ -219,7 +222,7 @@ def add_pragmas(ast_node, pragma_lines, nesting_depth=-1): ...@@ -219,7 +222,7 @@ def add_pragmas(ast_node, pragma_lines, nesting_depth=-1):
"""Prepends given pragma lines to all loops of specified nesting depth. """Prepends given pragma lines to all loops of specified nesting depth.
Args: Args:
ast: pystencils abstract syntax tree ast_node: pystencils abstract syntax tree
pragma_lines: Iterable of strings containing the pragma lines pragma_lines: Iterable of strings containing the pragma lines
nesting_depth: Nesting depth of the loops the pragmas should be applied to. nesting_depth: Nesting depth of the loops the pragmas should be applied to.
Outermost loop has depth 0. Outermost loop has depth 0.
......
...@@ -256,9 +256,7 @@ class Field: ...@@ -256,9 +256,7 @@ class Field:
self.shape = shape self.shape = shape
self.strides = strides self.strides = strides
self.latex_name: Optional[str] = None self.latex_name: Optional[str] = None
self.coordinate_origin: tuple[float, sp.Symbol] = sp.Matrix(tuple( self.coordinate_origin = sp.Matrix([0] * self.spatial_dimensions)
0 for _ in range(self.spatial_dimensions)
))
self.coordinate_transform = sp.eye(self.spatial_dimensions) self.coordinate_transform = sp.eye(self.spatial_dimensions)
if field_type == FieldType.STAGGERED: if field_type == FieldType.STAGGERED:
assert self.staggered_stencil assert self.staggered_stencil
...@@ -267,8 +265,7 @@ class Field: ...@@ -267,8 +265,7 @@ class Field:
if self.has_fixed_shape: if self.has_fixed_shape:
return Field(new_name, self.field_type, self._dtype, self._layout, self.shape, self.strides) return Field(new_name, self.field_type, self._dtype, self._layout, self.shape, self.strides)
else: else:
return Field.create_generic(new_name, self.spatial_dimensions, self.dtype.numpy_dtype, return Field(new_name, self.field_type, self.dtype, self.layout, self.shape, self.strides)
self.index_dimensions, self._layout, self.index_shape, self.field_type)
@property @property
def spatial_dimensions(self) -> int: def spatial_dimensions(self) -> int:
......
...@@ -217,7 +217,12 @@ class BlockIndexing(AbstractIndexing): ...@@ -217,7 +217,12 @@ class BlockIndexing(AbstractIndexing):
def guard(self, kernel_content, arr_shape): def guard(self, kernel_content, arr_shape):
arr_shape = arr_shape[:self._dim] arr_shape = arr_shape[:self._dim]
numeric_iteration_slice = _get_numeric_iteration_slice(self._iteration_space, arr_shape) if len(self._iteration_space) - 1 == len(arr_shape):
numeric_iteration_slice = _get_numeric_iteration_slice(self._iteration_space[1:], arr_shape)
numeric_iteration_slice = [self.iteration_space[0]] + numeric_iteration_slice
else:
assert len(self._iteration_space) == len(arr_shape), "Iteration space must be equal to the array shape"
numeric_iteration_slice = _get_numeric_iteration_slice(self._iteration_space, arr_shape)
end = [s.stop if s.stop != 0 else 1 for s in numeric_iteration_slice] end = [s.stop if s.stop != 0 else 1 for s in numeric_iteration_slice]
if self._dim < 4: if self._dim < 4:
......
import sympy as sp
from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
from pystencils.config import CreateKernelConfig from pystencils.config import CreateKernelConfig
from pystencils.typing import StructType, TypedSymbol from pystencils.typing import StructType, TypedSymbol
...@@ -9,7 +11,7 @@ from pystencils.node_collection import NodeCollection ...@@ -9,7 +11,7 @@ from pystencils.node_collection import NodeCollection
from pystencils.gpu.indexing import indexing_creator_from_params from pystencils.gpu.indexing import indexing_creator_from_params
from pystencils.slicing import normalize_slice from pystencils.slicing import normalize_slice
from pystencils.transformations import ( from pystencils.transformations import (
get_base_buffer_index, get_common_field, parse_base_pointer_info, get_base_buffer_index, get_common_field, get_common_indexed_element, parse_base_pointer_info,
resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols) resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols)
...@@ -34,7 +36,9 @@ def create_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig): ...@@ -34,7 +36,9 @@ def create_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig):
field_accesses = set() field_accesses = set()
num_buffer_accesses = 0 num_buffer_accesses = 0
indexed_elements = set()
for eq in assignments: for eq in assignments:
indexed_elements.update(eq.atoms(sp.Indexed))
field_accesses.update(eq.atoms(Field.Access)) field_accesses.update(eq.atoms(Field.Access))
field_accesses = {e for e in field_accesses if not e.is_absolute_access} field_accesses = {e for e in field_accesses if not e.is_absolute_access}
num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field)) num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field))
...@@ -62,12 +66,19 @@ def create_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig): ...@@ -62,12 +66,19 @@ def create_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig):
iteration_space = normalize_slice(iteration_slice, common_shape) iteration_space = normalize_slice(iteration_slice, common_shape)
else: else:
iteration_space = normalize_slice(iteration_slice, common_shape) iteration_space = normalize_slice(iteration_slice, common_shape)
iteration_space = tuple([s if isinstance(s, slice) else slice(s, s, 1) for s in iteration_space]) iteration_space = tuple([s if isinstance(s, slice) else slice(s, s, 1) for s in iteration_space])
loop_counter_symbols = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(len(iteration_space))] loop_counter_symbols = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(len(iteration_space))]
indexing = indexing_creator(iteration_space=iteration_space, data_layout=common_field.layout) if len(indexed_elements) > 0:
loop_counter_assignments = indexing.get_loop_ctr_assignments(loop_counter_symbols) common_indexed_element = get_common_indexed_element(indexed_elements)
indexing = indexing_creator(iteration_space=(slice(0, common_indexed_element.shape[0], 1), *iteration_space),
data_layout=common_field.layout)
extended_ctrs = [common_indexed_element.indices[0], *loop_counter_symbols]
loop_counter_assignments = indexing.get_loop_ctr_assignments(extended_ctrs)
else:
indexing = indexing_creator(iteration_space=iteration_space, data_layout=common_field.layout)
loop_counter_assignments = indexing.get_loop_ctr_assignments(loop_counter_symbols)
assignments = loop_counter_assignments + assignments assignments = loop_counter_assignments + assignments
block = indexing.guard(Block(assignments), common_shape) block = indexing.guard(Block(assignments), common_shape)
......
...@@ -9,6 +9,7 @@ import pystencils.astnodes as ast ...@@ -9,6 +9,7 @@ import pystencils.astnodes as ast
from pystencils.backends.cbackend import CustomCodeNode from pystencils.backends.cbackend import CustomCodeNode
from pystencils.functions import DivFunc from pystencils.functions import DivFunc
from pystencils.simp import AssignmentCollection from pystencils.simp import AssignmentCollection
from pystencils.typing import FieldPointerSymbol
class NodeCollection: class NodeCollection:
...@@ -20,6 +21,8 @@ class NodeCollection: ...@@ -20,6 +21,8 @@ class NodeCollection:
if isinstance(obj, (list, tuple)): if isinstance(obj, (list, tuple)):
return [visit(e) for e in obj] return [visit(e) for e in obj]
if isinstance(obj, Assignment): if isinstance(obj, Assignment):
if isinstance(obj.lhs, FieldPointerSymbol):
return ast.SympyAssignment(obj.lhs, obj.rhs, is_const=obj.lhs.dtype.const)
return ast.SympyAssignment(obj.lhs, obj.rhs) return ast.SympyAssignment(obj.lhs, obj.rhs)
elif isinstance(obj, AddAugmentedAssignment): elif isinstance(obj, AddAugmentedAssignment):
return ast.SympyAssignment(obj.lhs, obj.lhs + obj.rhs) return ast.SympyAssignment(obj.lhs, obj.lhs + obj.rhs)
......
...@@ -185,7 +185,7 @@ def get_common_field(field_set): ...@@ -185,7 +185,7 @@ def get_common_field(field_set):
raise ValueError("Differently sized field accesses in loop body: " + str(shape_set)) raise ValueError("Differently sized field accesses in loop body: " + str(shape_set))
# Sort the fields by their name to ensure that always the same field is returned # Sort the fields by their name to ensure that always the same field is returned
reference_field = list(sorted(field_set, key=lambda e: str(e)))[0] reference_field = sorted(field_set, key=lambda e: str(e))[0]
return reference_field return reference_field
...@@ -261,6 +261,26 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or ...@@ -261,6 +261,26 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
return current_body, ghost_layers return current_body, ghost_layers
def get_common_indexed_element(indexed_elements: Set[sp.IndexedBase]) -> sp.IndexedBase:
assert len(indexed_elements) > 0, "indexed_elements can not be empty"
shape_set = {s.shape for s in indexed_elements}
if len(shape_set) != 1:
for shape in shape_set:
assert not isinstance(shape, int), "If indexed elements are used, they must all have the same shape"
return sorted(indexed_elements, key=lambda e: str(e))[0]
def add_outer_loop_over_indexed_elements(loop_node: ast.Block) -> ast.Block:
indexed_elements = loop_node.atoms(sp.Indexed)
if len(indexed_elements) == 0:
return loop_node
reference_element = get_common_indexed_element(indexed_elements)
new_loop = ast.LoopOverCoordinate(loop_node, 0, 0,
reference_element.shape[0], 1, custom_loop_ctr=reference_element.indices[0])
return ast.Block([new_loop])
def create_intermediate_base_pointer(field_access, coordinates, previous_ptr): def create_intermediate_base_pointer(field_access, coordinates, previous_ptr):
r""" r"""
Addressing elements in structured arrays is done with :math:`ptr\left[ \sum_i c_i \cdot s_i \right]` Addressing elements in structured arrays is done with :math:`ptr\left[ \sum_i c_i \cdot s_i \right]`
...@@ -411,11 +431,22 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None): ...@@ -411,11 +431,22 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None):
loop_counters = [loop.loop_counter_symbol for loop in loops] loop_counters = [loop.loop_counter_symbol for loop in loops]
loop_iterations = [slice(loop.start, loop.stop, loop.step) for loop in loops] loop_iterations = [slice(loop.start, loop.stop, loop.step) for loop in loops]
actual_sizes = [int_div((s.stop - s.start), s.step) actual_sizes = list()
if s.step != 1 else s.stop - s.start for s in loop_iterations] actual_steps = list()
for ctr, s in zip(loop_counters, loop_iterations):
if s.step != 1:
if (s.stop - s.start) % s.step == 0:
actual_sizes.append((s.stop - s.start) // s.step)
else:
actual_sizes.append(int_div((s.stop - s.start), s.step))
actual_steps = [int_div((ctr - s.start), s.step) if (ctr - s.start) % s.step == 0:
if s.step != 1 else ctr - s.start for ctr, s in zip(loop_counters, loop_iterations)] actual_steps.append((ctr - s.start) // s.step)
else:
actual_steps.append(int_div((ctr - s.start), s.step))
else:
actual_sizes.append(s.stop - s.start)
actual_steps.append(ctr - s.start)
field_accesses = ast_node.atoms(Field.Access) field_accesses = ast_node.atoms(Field.Access)
buffer_accesses = {fa for fa in field_accesses if FieldType.is_buffer(fa.field)} buffer_accesses = {fa for fa in field_accesses if FieldType.is_buffer(fa.field)}
......
...@@ -189,16 +189,17 @@ class VectorType(AbstractType): ...@@ -189,16 +189,17 @@ class VectorType(AbstractType):
class PointerType(AbstractType): class PointerType(AbstractType):
def __init__(self, base_type: BasicType, const: bool = False, restrict: bool = True): def __init__(self, base_type: BasicType, const: bool = False, restrict: bool = True, double_pointer: bool = False):
self._base_type = base_type self._base_type = base_type
self.const = const self.const = const
self.restrict = restrict self.restrict = restrict
self.double_pointer = double_pointer
def __getnewargs__(self): def __getnewargs__(self):
return self.base_type, self.const, self.restrict return self.base_type, self.const, self.restrict, self.double_pointer
def __getnewargs_ex__(self): def __getnewargs_ex__(self):
return (self.base_type, self.const, self.restrict), {} return (self.base_type, self.const, self.restrict, self.double_pointer), {}
@property @property
def alias(self): def alias(self):
...@@ -210,16 +211,25 @@ class PointerType(AbstractType): ...@@ -210,16 +211,25 @@ class PointerType(AbstractType):
@property @property
def item_size(self): def item_size(self):
return self.base_type.item_size if self.double_pointer:
raise NotImplementedError("The item_size for double_pointer is not implemented")
else:
return self.base_type.item_size
def __eq__(self, other): def __eq__(self, other):
if not isinstance(other, PointerType): if not isinstance(other, PointerType):
return False return False
else: else:
return (self.base_type, self.const, self.restrict) == (other.base_type, other.const, other.restrict) own = (self.base_type, self.const, self.restrict, self.double_pointer)
return own == (other.base_type, other.const, other.restrict, other.double_pointer)
def __str__(self): def __str__(self):
return f'{str(self.base_type)} * {"RESTRICT " if self.restrict else "" }{"const" if self.const else ""}' restrict_str = "RESTRICT" if self.restrict else ""
const_str = "const" if self.const else ""
if self.double_pointer:
return f'{str(self.base_type)} ** {restrict_str} {const_str}'
else:
return f'{str(self.base_type)} * {restrict_str} {const_str}'
def __repr__(self): def __repr__(self):
return str(self) return str(self)
...@@ -228,7 +238,7 @@ class PointerType(AbstractType): ...@@ -228,7 +238,7 @@ class PointerType(AbstractType):
return str(self) return str(self)
def __hash__(self): def __hash__(self):
return hash((self._base_type, self.const, self.restrict)) return hash((self._base_type, self.const, self.restrict, self.double_pointer))
class StructType(AbstractType): class StructType(AbstractType):
......
import sympy as sp
import numpy as np import numpy as np
import pytest import pytest
import pystencils as ps import pystencils as ps
from pystencils import Assignment, Field, CreateKernelConfig, create_kernel, Target from pystencils import Assignment, Field, CreateKernelConfig, create_kernel, Target
from pystencils.transformations import filtered_tree_iteration
from pystencils.typing import BasicType, FieldPointerSymbol, PointerType, TypedSymbol
def test_indexed_kernel(): @pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
def test_indexed_kernel(target):
if target == Target.GPU:
pytest.importorskip("cupy")
import cupy as cp
arr = np.zeros((3, 4)) arr = np.zeros((3, 4))
dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)]) dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)])
index_arr = np.zeros((3,), dtype=dtype) index_arr = np.zeros((3,), dtype=dtype)
...@@ -17,38 +25,55 @@ def test_indexed_kernel(): ...@@ -17,38 +25,55 @@ def test_indexed_kernel():
normal_field = Field.create_from_numpy_array('f', arr) normal_field = Field.create_from_numpy_array('f', arr)
update_rule = Assignment(normal_field[0, 0], indexed_field('value')) update_rule = Assignment(normal_field[0, 0], indexed_field('value'))
config = CreateKernelConfig(index_fields=[indexed_field]) config = CreateKernelConfig(target=target, index_fields=[indexed_field])
ast = create_kernel([update_rule], config=config) ast = create_kernel([update_rule], config=config)
kernel = ast.compile() kernel = ast.compile()
kernel(f=arr, index=index_arr)
code = ps.get_code_str(kernel) if target == Target.CPU:
kernel(f=arr, index=index_arr)
else:
gpu_arr = cp.asarray(arr)
gpu_index_arr = cp.ndarray(index_arr.shape, dtype=index_arr.dtype)
gpu_index_arr.set(index_arr)
kernel(f=gpu_arr, index=gpu_index_arr)
arr = gpu_arr.get()
for i in range(index_arr.shape[0]): for i in range(index_arr.shape[0]):
np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13) np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13)
def test_indexed_gpu_kernel(): @pytest.mark.parametrize('index_size', ("fixed", "variable"))
pytest.importorskip("cupy") @pytest.mark.parametrize('array_size', ("3D", "2D", "10, 12", "13, 17, 19"))
import cupy as cp @pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('dtype', ("float64", "float32"))
def test_indexed_domain_kernel(index_size, array_size, target, dtype):
dtype = BasicType(dtype)
arr = np.zeros((3, 4)) f = ps.fields(f'f(1): {dtype.numpy_dtype.name}[{array_size}]')
dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)]) g = ps.fields(f'g(1): {dtype.numpy_dtype.name}[{array_size}]')
index_arr = np.zeros((3,), dtype=dtype)
index_arr[0] = (0, 2, 3.0)
index_arr[1] = (1, 3, 42.0)
index_arr[2] = (2, 1, 5.0)
indexed_field = Field.create_from_numpy_array('index', index_arr) index = TypedSymbol("index", dtype=BasicType(np.int16))
normal_field = Field.create_from_numpy_array('f', arr) if index_size == "variable":
update_rule = Assignment(normal_field[0, 0], indexed_field('value')) index_src = TypedSymbol("_size_src", dtype=BasicType(np.int16))
index_dst = TypedSymbol("_size_dst", dtype=BasicType(np.int16))
else:
index_src = 16
index_dst = 16
pointer_type = PointerType(dtype, const=False, restrict=True, double_pointer=True)
const_pointer_type = PointerType(dtype, const=True, restrict=True, double_pointer=True)
config = CreateKernelConfig(target=Target.GPU, index_fields=[indexed_field]) src = sp.IndexedBase(TypedSymbol(f"_data_{f.name}", dtype=const_pointer_type), shape=index_src)
ast = create_kernel([update_rule], config=config) dst = sp.IndexedBase(TypedSymbol(f"_data_{g.name}", dtype=pointer_type), shape=index_dst)
kernel = ast.compile()
update_rule = [ps.Assignment(FieldPointerSymbol("f", dtype, const=True), src[index]),
ps.Assignment(FieldPointerSymbol("g", dtype, const=False), dst[index]),
ps.Assignment(g.center, f.center)]
ast = ps.create_kernel(update_rule, target=target)
code = ps.get_code_str(ast)
assert f"const {dtype.c_name} * RESTRICT _data_f = (({dtype.c_name} * RESTRICT const)(_data_f[index]));" in code
assert f"{dtype.c_name} * RESTRICT _data_g = (({dtype.c_name} * RESTRICT )(_data_g[index]));" in code
if target == Target.CPU:
assert code.count("for") == f.spatial_dimensions + 1
gpu_arr = cp.asarray(arr)
gpu_index_arr = cp.ndarray(index_arr.shape, dtype=index_arr.dtype)
gpu_index_arr.set(index_arr)
kernel(f=gpu_arr, index=gpu_index_arr)
arr = gpu_arr.get()
for i in range(index_arr.shape[0]):
np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13)