Skip to content
Snippets Groups Projects
Commit 0274362a authored by Christoph Alt's avatar Christoph Alt
Browse files

Merge branch 'FixBufferGPU' into 'master'

[Fix] GPU Buffer with iteration slices

See merge request pycodegen/pystencils!318
parents 04232aaa c3e33069
No related branches found
No related tags found
No related merge requests found
...@@ -169,7 +169,7 @@ class BlockIndexing(AbstractIndexing): ...@@ -169,7 +169,7 @@ class BlockIndexing(AbstractIndexing):
conditions = [c < e for c, e in zip(self.coordinates, end)] conditions = [c < e for c, e in zip(self.coordinates, end)]
for cuda_index, iter_slice in zip(self.cuda_indices, self._iterationSlice): for cuda_index, iter_slice in zip(self.cuda_indices, self._iterationSlice):
if iter_slice.step > 1: if isinstance(iter_slice, slice) and iter_slice.step > 1:
conditions.append(sp.Eq(sp.Mod(cuda_index, iter_slice.step), 0)) conditions.append(sp.Eq(sp.Mod(cuda_index, iter_slice.step), 0))
condition = conditions[0] condition = conditions[0]
...@@ -177,6 +177,9 @@ class BlockIndexing(AbstractIndexing): ...@@ -177,6 +177,9 @@ class BlockIndexing(AbstractIndexing):
condition = sp.And(condition, c) condition = sp.And(condition, c)
return Block([Conditional(condition, kernel_content)]) return Block([Conditional(condition, kernel_content)])
def iteration_space(self, arr_shape):
return _iteration_space(self._iterationSlice, arr_shape)
@staticmethod @staticmethod
def limit_block_size_by_register_restriction(block_size, required_registers_per_thread, device=None): def limit_block_size_by_register_restriction(block_size, required_registers_per_thread, device=None):
"""Shrinks the block_size if there are too many registers used per multiprocessor. """Shrinks the block_size if there are too many registers used per multiprocessor.
...@@ -284,6 +287,9 @@ class LineIndexing(AbstractIndexing): ...@@ -284,6 +287,9 @@ class LineIndexing(AbstractIndexing):
def symbolic_parameters(self): def symbolic_parameters(self):
return set() return set()
def iteration_space(self, arr_shape):
return _iteration_space(self._iterationSlice, arr_shape)
# -------------------------------------- Helper functions -------------------------------------------------------------- # -------------------------------------- Helper functions --------------------------------------------------------------
...@@ -310,6 +316,23 @@ def _get_end_from_slice(iteration_slice, arr_shape): ...@@ -310,6 +316,23 @@ def _get_end_from_slice(iteration_slice, arr_shape):
return res return res
def _get_steps_from_slice(iteration_slice):
res = []
for slice_component in iteration_slice:
if type(slice_component) is slice:
res.append(slice_component.step)
else:
res.append(1)
return res
def _iteration_space(iteration_slice, arr_shape):
starts = _get_start_from_slice(iteration_slice)
ends = _get_end_from_slice(iteration_slice, arr_shape)
steps = _get_steps_from_slice(iteration_slice)
return [slice(start, end, step) for start, end, step in zip(starts, ends, steps)]
def indexing_creator_from_params(gpu_indexing, gpu_indexing_params): def indexing_creator_from_params(gpu_indexing, gpu_indexing_params):
if isinstance(gpu_indexing, str): if isinstance(gpu_indexing, str):
if gpu_indexing == 'block': if gpu_indexing == 'block':
......
...@@ -13,7 +13,7 @@ from pystencils.node_collection import NodeCollection ...@@ -13,7 +13,7 @@ from pystencils.node_collection import NodeCollection
from pystencils.gpucuda.indexing import indexing_creator_from_params from pystencils.gpucuda.indexing import indexing_creator_from_params
from pystencils.simp.assignment_collection import AssignmentCollection from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.transformations import ( from pystencils.transformations import (
get_base_buffer_index, get_common_shape, parse_base_pointer_info, get_base_buffer_index, get_common_field, parse_base_pointer_info,
resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols) resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols)
...@@ -44,7 +44,9 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection], ...@@ -44,7 +44,9 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection],
field_accesses = {e for e in field_accesses if not e.is_absolute_access} field_accesses = {e for e in field_accesses if not e.is_absolute_access}
num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field)) num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field))
common_shape = get_common_shape(fields_without_buffers) # common shape and field to from the iteration space
common_field = get_common_field(fields_without_buffers)
common_shape = common_field.spatial_shape
if iteration_slice is None: if iteration_slice is None:
# determine iteration slice from ghost layers # determine iteration slice from ghost layers
...@@ -62,7 +64,7 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection], ...@@ -62,7 +64,7 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection],
iteration_slice.append(slice(ghost_layers[i][0], iteration_slice.append(slice(ghost_layers[i][0],
-ghost_layers[i][1] if ghost_layers[i][1] > 0 else None)) -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None))
indexing = indexing_creator(field=list(fields_without_buffers)[0], iteration_slice=iteration_slice) indexing = indexing_creator(field=common_field, iteration_slice=iteration_slice)
coord_mapping = indexing.coordinates coord_mapping = indexing.coordinates
cell_idx_assignments = [SympyAssignment(LoopOverCoordinate.get_loop_counter_symbol(i), value) cell_idx_assignments = [SympyAssignment(LoopOverCoordinate.get_loop_counter_symbol(i), value)
...@@ -92,7 +94,8 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection], ...@@ -92,7 +94,8 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection],
coord_mapping = {f.name: cell_idx_symbols for f in all_fields} coord_mapping = {f.name: cell_idx_symbols for f in all_fields}
if any(FieldType.is_buffer(f) for f in all_fields): if any(FieldType.is_buffer(f) for f in all_fields):
resolve_buffer_accesses(ast, get_base_buffer_index(ast, indexing.coordinates, common_shape), read_only_fields) iteration_space = indexing.iteration_space(common_shape)
resolve_buffer_accesses(ast, get_base_buffer_index(ast, cell_idx_symbols, iteration_space), read_only_fields)
resolve_field_accesses(ast, read_only_fields, field_to_base_pointer_info=base_pointer_info, resolve_field_accesses(ast, read_only_fields, field_to_base_pointer_info=base_pointer_info,
field_to_fixed_coordinates=coord_mapping) field_to_fixed_coordinates=coord_mapping)
...@@ -157,7 +160,7 @@ def created_indexed_cuda_kernel(assignments: Union[AssignmentCollection, NodeCol ...@@ -157,7 +160,7 @@ def created_indexed_cuda_kernel(assignments: Union[AssignmentCollection, NodeCol
iteration_slice=[slice(None, None, None)] * len(idx_field.spatial_shape)) iteration_slice=[slice(None, None, None)] * len(idx_field.spatial_shape))
function_body = Block(coordinate_symbol_assignments + assignments) function_body = Block(coordinate_symbol_assignments + assignments)
function_body = indexing.guard(function_body, get_common_shape(index_fields)) function_body = indexing.guard(function_body, get_common_field(index_fields).spatial_shape)
ast = KernelFunction(function_body, Target.GPU, Backend.CUDA, make_python_function, ast = KernelFunction(function_body, Target.GPU, Backend.CUDA, make_python_function,
None, function_name, assignments=assignments) None, function_name, assignments=assignments)
ast.global_variables.update(indexing.index_variables) ast.global_variables.update(indexing.index_variables)
......
...@@ -122,9 +122,10 @@ def unify_shape_symbols(body, common_shape, fields): ...@@ -122,9 +122,10 @@ def unify_shape_symbols(body, common_shape, fields):
body.subs(substitutions) body.subs(substitutions)
def get_common_shape(field_set): def get_common_field(field_set):
"""Takes a set of pystencils Fields and returns their common spatial shape if it exists. Otherwise """Takes a set of pystencils Fields, checks if a common spatial shape exists and returns one
ValueError is raised""" representative field, that can be used for shape information etc. in the kernel creation.
If the fields have different shapes ValueError is raised"""
nr_of_fixed_shaped_fields = 0 nr_of_fixed_shaped_fields = 0
for f in field_set: for f in field_set:
if f.has_fixed_shape: if f.has_fixed_shape:
...@@ -142,8 +143,9 @@ def get_common_shape(field_set): ...@@ -142,8 +143,9 @@ def get_common_shape(field_set):
if len(shape_set) != 1: if len(shape_set) != 1:
raise ValueError("Differently sized field accesses in loop body: " + str(shape_set)) raise ValueError("Differently sized field accesses in loop body: " + str(shape_set))
shape = list(sorted(shape_set, key=lambda e: str(e[0])))[0] # Sort the fields by their name to ensure that always the same field is returned
return shape reference_field = list(sorted(field_set, key=lambda e: str(e)))[0]
return reference_field
def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_order=None): def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_order=None):
...@@ -178,13 +180,15 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or ...@@ -178,13 +180,15 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
if absolut_accesses_only: if absolut_accesses_only:
absolut_access_fields = {e.field for e in body.atoms(Field.Access)} absolut_access_fields = {e.field for e in body.atoms(Field.Access)}
shape = get_common_shape(absolut_access_fields) common_field = get_common_field(absolut_access_fields)
common_shape = common_field.spatial_shape
else: else:
shape = get_common_shape(fields) common_field = get_common_field(fields)
unify_shape_symbols(body, common_shape=shape, fields=fields) common_shape = common_field.spatial_shape
unify_shape_symbols(body, common_shape=common_shape, fields=fields)
if iteration_slice is not None: if iteration_slice is not None:
iteration_slice = normalize_slice(iteration_slice, shape) iteration_slice = normalize_slice(iteration_slice, common_shape)
if ghost_layers is None: if ghost_layers is None:
if absolut_accesses_only: if absolut_accesses_only:
...@@ -199,7 +203,7 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or ...@@ -199,7 +203,7 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
for i, loop_coordinate in enumerate(reversed(loop_order)): for i, loop_coordinate in enumerate(reversed(loop_order)):
if iteration_slice is None: if iteration_slice is None:
begin = ghost_layers[loop_coordinate][0] begin = ghost_layers[loop_coordinate][0]
end = shape[loop_coordinate] - ghost_layers[loop_coordinate][1] end = common_shape[loop_coordinate] - ghost_layers[loop_coordinate][1]
new_loop = ast.LoopOverCoordinate(current_body, loop_coordinate, begin, end, 1) new_loop = ast.LoopOverCoordinate(current_body, loop_coordinate, begin, end, 1)
current_body = ast.Block([new_loop]) current_body = ast.Block([new_loop])
else: else:
...@@ -351,7 +355,7 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None): ...@@ -351,7 +355,7 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None):
ast_node: ast before any field accesses are resolved ast_node: ast before any field accesses are resolved
loop_counters: for CPU kernels: leave to default 'None' (can be determined from loop nodes) loop_counters: for CPU kernels: leave to default 'None' (can be determined from loop nodes)
for GPU kernels: list of 'loop counters' from inner to outer loop for GPU kernels: list of 'loop counters' from inner to outer loop
loop_iterations: number of iterations of each loop from inner to outer, for CPU kernels leave to default loop_iterations: iteration slice for each loop from inner to outer, for CPU kernels leave to default
Returns: Returns:
base buffer index - required by 'resolve_buffer_accesses' function base buffer index - required by 'resolve_buffer_accesses' function
...@@ -363,15 +367,14 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None): ...@@ -363,15 +367,14 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None):
assert len(loops) == len(parents_of_innermost_loop) assert len(loops) == len(parents_of_innermost_loop)
assert all(l1 is l2 for l1, l2 in zip(loops, parents_of_innermost_loop)) assert all(l1 is l2 for l1, l2 in zip(loops, parents_of_innermost_loop))
actual_sizes = [int_div((loop.stop - loop.start), loop.step) loop_counters = [loop.loop_counter_symbol for loop in loops]
if loop.step != 1 else loop.stop - loop.start for loop in loops] loop_iterations = [slice(loop.start, loop.stop, loop.step) for loop in loops]
actual_steps = [int_div((loop.loop_counter_symbol - loop.start), loop.step) actual_sizes = [int_div((s.stop - s.start), s.step)
if loop.step != 1 else loop.loop_counter_symbol - loop.start for loop in loops] if s.step != 1 else s.stop - s.start for s in loop_iterations]
else: actual_steps = [int_div((ctr - s.start), s.step)
actual_sizes = loop_iterations if s.step != 1 else ctr - s.start for ctr, s in zip(loop_counters, loop_iterations)]
actual_steps = loop_counters
field_accesses = ast_node.atoms(Field.Access) field_accesses = ast_node.atoms(Field.Access)
buffer_accesses = {fa for fa in field_accesses if FieldType.is_buffer(fa.field)} buffer_accesses = {fa for fa in field_accesses if FieldType.is_buffer(fa.field)}
......
...@@ -22,7 +22,7 @@ def _generate_fields(dt=np.uint64, num_directions=1, layout='numpy'): ...@@ -22,7 +22,7 @@ def _generate_fields(dt=np.uint64, num_directions=1, layout='numpy'):
field_layout = layout_string_to_tuple(layout, len(size)) field_layout = layout_string_to_tuple(layout, len(size))
src_arr = create_numpy_array_with_layout(size, field_layout, dtype=dt) src_arr = create_numpy_array_with_layout(size, field_layout, dtype=dt)
array_data = np.reshape(np.arange(1, int(np.prod(size)+1)), size) array_data = np.reshape(np.arange(1, int(np.prod(size) + 1)), size)
# Use flat iterator to input data into the array # Use flat iterator to input data into the array
src_arr.flat = add_ghost_layers(array_data, index_dimensions=1 if num_directions > 1 else 0).astype(dt).flat src_arr.flat = add_ghost_layers(array_data, index_dimensions=1 if num_directions > 1 else 0).astype(dt).flat
dst_arr = np.zeros(src_arr.shape, dtype=dt) dst_arr = np.zeros(src_arr.shape, dtype=dt)
...@@ -41,7 +41,8 @@ def test_full_scalar_field(): ...@@ -41,7 +41,8 @@ def test_full_scalar_field():
field_type=FieldType.BUFFER, dtype=src_arr.dtype) field_type=FieldType.BUFFER, dtype=src_arr.dtype)
pack_eqs = [Assignment(buffer.center(), src_field.center())] pack_eqs = [Assignment(buffer.center(), src_field.center())]
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
code = ps.get_code_str(pack_code) code = ps.get_code_str(pack_code)
ps.show_code(pack_code) ps.show_code(pack_code)
...@@ -49,7 +50,9 @@ def test_full_scalar_field(): ...@@ -49,7 +50,9 @@ def test_full_scalar_field():
pack_kernel(buffer=buffer_arr, src_field=src_arr) pack_kernel(buffer=buffer_arr, src_field=src_arr)
unpack_eqs = [Assignment(dst_field.center(), buffer.center())] unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile() unpack_kernel = unpack_code.compile()
unpack_kernel(dst_field=dst_arr, buffer=buffer_arr) unpack_kernel(dst_field=dst_arr, buffer=buffer_arr)
...@@ -73,14 +76,18 @@ def test_field_slice(): ...@@ -73,14 +76,18 @@ def test_field_slice():
field_type=FieldType.BUFFER, dtype=src_arr.dtype) field_type=FieldType.BUFFER, dtype=src_arr.dtype)
pack_eqs = [Assignment(buffer.center(), src_field.center())] pack_eqs = [Assignment(buffer.center(), src_field.center())]
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile() pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr[pack_slice]) pack_kernel(buffer=bufferArr, src_field=src_arr[pack_slice])
# Unpack into ghost layer of dst_field in N direction # Unpack into ghost layer of dst_field in N direction
unpack_eqs = [Assignment(dst_field.center(), buffer.center())] unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile() unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr[unpack_slice]) unpack_kernel(buffer=bufferArr, dst_field=dst_arr[unpack_slice])
...@@ -105,7 +112,8 @@ def test_all_cell_values(): ...@@ -105,7 +112,8 @@ def test_all_cell_values():
eq = Assignment(buffer(idx), src_field(idx)) eq = Assignment(buffer(idx), src_field(idx))
pack_eqs.append(eq) pack_eqs.append(eq)
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile() pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr) pack_kernel(buffer=bufferArr, src_field=src_arr)
...@@ -115,7 +123,8 @@ def test_all_cell_values(): ...@@ -115,7 +123,8 @@ def test_all_cell_values():
eq = Assignment(dst_field(idx), buffer(idx)) eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq) unpack_eqs.append(eq)
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile() unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr) unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
...@@ -141,7 +150,8 @@ def test_subset_cell_values(): ...@@ -141,7 +150,8 @@ def test_subset_cell_values():
eq = Assignment(buffer(buffer_idx), src_field(cell_idx)) eq = Assignment(buffer(buffer_idx), src_field(cell_idx))
pack_eqs.append(eq) pack_eqs.append(eq)
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile() pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr) pack_kernel(buffer=bufferArr, src_field=src_arr)
...@@ -151,7 +161,8 @@ def test_subset_cell_values(): ...@@ -151,7 +161,8 @@ def test_subset_cell_values():
eq = Assignment(dst_field(cell_idx), buffer(buffer_idx)) eq = Assignment(dst_field(cell_idx), buffer(buffer_idx))
unpack_eqs.append(eq) unpack_eqs.append(eq)
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile() unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr) unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
...@@ -176,7 +187,8 @@ def test_field_layouts(): ...@@ -176,7 +187,8 @@ def test_field_layouts():
eq = Assignment(buffer(idx), src_field(idx)) eq = Assignment(buffer(idx), src_field(idx))
pack_eqs.append(eq) pack_eqs.append(eq)
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile() pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr) pack_kernel(buffer=bufferArr, src_field=src_arr)
...@@ -186,7 +198,8 @@ def test_field_layouts(): ...@@ -186,7 +198,8 @@ def test_field_layouts():
eq = Assignment(dst_field(idx), buffer(idx)) eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq) unpack_eqs.append(eq)
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile() unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr) unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
...@@ -202,7 +215,7 @@ def test_iteration_slices(): ...@@ -202,7 +215,7 @@ def test_iteration_slices():
src_field = Field.create_generic("src_field", spatial_dimensions, index_shape=(num_cell_values,), dtype=dt) src_field = Field.create_generic("src_field", spatial_dimensions, index_shape=(num_cell_values,), dtype=dt)
dst_field = Field.create_generic("dst_field", spatial_dimensions, index_shape=(num_cell_values,), dtype=dt) dst_field = Field.create_generic("dst_field", spatial_dimensions, index_shape=(num_cell_values,), dtype=dt)
buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1, buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1,
field_type=FieldType.BUFFER, dtype=src_arr.dtype) field_type=FieldType.BUFFER, dtype=src_arr.dtype)
pack_eqs = [] pack_eqs = []
# Since we are packing all cell values for all cells, then # Since we are packing all cell values for all cells, then
...@@ -214,13 +227,16 @@ def test_iteration_slices(): ...@@ -214,13 +227,16 @@ def test_iteration_slices():
dim = src_field.spatial_dimensions dim = src_field.spatial_dimensions
# Pack only the leftmost slice, only every second cell # Pack only the leftmost slice, only every second cell
pack_slice = (slice(None, None, 2),) * (dim-1) + (0, ) pack_slice = (slice(None, None, 2),) * (dim - 1) + (0,)
# Fill the entire array with data # Fill the entire array with data
src_arr[(slice(None, None, 1),) * dim] = np.arange(num_cell_values) src_arr[(slice(None, None, 1),) * dim] = np.arange(num_cell_values)
dst_arr.fill(0) dst_arr.fill(0)
pack_code = create_kernel(pack_eqs, iteration_slice=pack_slice, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(iteration_slice=pack_slice,
data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile() pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr) pack_kernel(buffer=bufferArr, src_field=src_arr)
...@@ -230,12 +246,14 @@ def test_iteration_slices(): ...@@ -230,12 +246,14 @@ def test_iteration_slices():
eq = Assignment(dst_field(idx), buffer(idx)) eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq) unpack_eqs.append(eq)
unpack_code = create_kernel(unpack_eqs, iteration_slice=pack_slice, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype}) config = ps.CreateKernelConfig(iteration_slice=pack_slice,
data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile() unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr) unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
# Check if only every second entry of the leftmost slice has been copied # Check if only every second entry of the leftmost slice has been copied
np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice]) np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice])
np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim-1) + (0,)], 0) np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim - 1) + (0,)], 0)
np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim-1) + (slice(1,None),)], 0) np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim - 1) + (slice(1, None),)], 0)
...@@ -274,3 +274,59 @@ def test_buffer_indexing(): ...@@ -274,3 +274,59 @@ def test_buffer_indexing():
assert s in src_field_size assert s in src_field_size
assert len(spatial_shape_symbols) <= 3 assert len(spatial_shape_symbols) <= 3
def test_iteration_slices():
num_cell_values = 19
dt = np.uint64
fields = _generate_fields(dt=dt, stencil_directions=num_cell_values)
for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields:
src_field = Field.create_from_numpy_array("src_field", gpu_src_arr, index_dimensions=1)
dst_field = Field.create_from_numpy_array("dst_field", gpu_src_arr, index_dimensions=1)
buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1,
field_type=FieldType.BUFFER, dtype=src_arr.dtype)
pack_eqs = []
# Since we are packing all cell values for all cells, then
# the buffer index is equivalent to the field index
for idx in range(num_cell_values):
eq = Assignment(buffer(idx), src_field(idx))
pack_eqs.append(eq)
dim = src_field.spatial_dimensions
# Pack only the leftmost slice, only every second cell
pack_slice = (slice(None, None, 2),) * (dim - 1) + (0,)
# Fill the entire array with data
src_arr[(slice(None, None, 1),) * dim] = np.arange(num_cell_values)
gpu_src_arr[(slice(None, None, 1),) * dim] = src_arr
gpu_dst_arr.fill(0)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
data_type={'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)
unpack_eqs = []
for idx in range(num_cell_values):
eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
data_type={'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)
dst_arr = gpu_dst_arr.get()
src_arr = gpu_src_arr.get()
# Check if only every second entry of the leftmost slice has been copied
np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice])
np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim - 1) + (0,)], 0)
np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim - 1) + (slice(1, None),)], 0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment