From 495e92b9c2c88ed0cfcde350bbc7b45dae875d11 Mon Sep 17 00:00:00 2001 From: et31efoj <markus.holzer@fau.de> Date: Thu, 7 Feb 2019 18:09:20 +0100 Subject: [PATCH] Implemented an improved version of the communication for periodic BC The communication of the ghost layers used to communicate just all values in between one time step to make sure that everything is correct. Furthermore the communication was only valid for pull stream steps. The improved communication distinguishes automatically between pull and push and communicates only values which are needed. With this improvement it was possible to implement the EsoTwist streaming scheme. --- pystencils/boundaries/boundaryconditions.py | 2 +- .../datahandling/serial_datahandling.py | 35 +++++-- pystencils/gpucuda/periodicity.py | 93 ++++++++++++++++--- pystencils/transformations.py | 13 +-- pystencils_tests/test_cudagpu.py | 2 +- 5 files changed, 117 insertions(+), 28 deletions(-) diff --git a/pystencils/boundaries/boundaryconditions.py b/pystencils/boundaries/boundaryconditions.py index 10870c4cd..6628be820 100644 --- a/pystencils/boundaries/boundaryconditions.py +++ b/pystencils/boundaries/boundaryconditions.py @@ -1,6 +1,6 @@ -from typing import List, Tuple, Any from pystencils import Assignment from pystencils.boundaries.boundaryhandling import BoundaryOffsetInfo +from typing import List, Tuple, Any from pystencils.data_types import create_type diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py index 7e4d19cd8..dd2b70b89 100644 --- a/pystencils/datahandling/serial_datahandling.py +++ b/pystencils/datahandling/serial_datahandling.py @@ -250,7 +250,14 @@ class SerialDataHandling(DataHandling): def synchronization_function_gpu(self, names, stencil_name=None, **_): return self.synchronization_function(names, stencil_name, 'gpu') - def synchronization_function(self, names, stencil=None, target=None, **_): + def get_communication_before_sweep(self, names, stencil_name=None, update_rule=None, **_): + return self.synchronization_function(names, stencil_name, self.default_target, update_rule, 'pull') + + def get_communication_after_sweep(self, names, stencil_name=None, update_rule=None, **_): + return self.synchronization_function(names, stencil_name, self.default_target, update_rule, 'push') + + def synchronization_function(self, names, stencil=None, target=None, update_rule=None, + reads_writes="pull", **_): if target is None: target = self.default_target assert target in ('cpu', 'gpu') @@ -265,11 +272,11 @@ class SerialDataHandling(DataHandling): elif (stencil is None and self.dim == 3) or (stencil is not None and stencil.startswith('D3')): directions = itertools.product(*[neighbors] * 3) else: - raise ValueError("Invalid stencil") + directions = itertools.product(*[neighbors] * 1) for direction in directions: use_direction = True - if direction == (0, 0) or direction == (0, 0, 0): + if direction == (0,) or direction == (0, 0) or direction == (0, 0, 0): use_direction = False for component, periodicity in zip(direction, self._periodicity): if not periodicity and component != 0: @@ -290,15 +297,31 @@ class SerialDataHandling(DataHandling): if len(filtered_stencil) > 0: if target == 'cpu': - from pystencils.slicing import get_periodic_boundary_functor - result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls)) + if update_rule is None: + from pystencils.slicing import get_periodic_boundary_functor + result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls)) + else: + from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func + result.append(boundary_func(filtered_stencil, self._domainSize, + index_dimensions=self.fields[name].index_dimensions, + index_dim_shape=values_per_cell, + dtype=self.fields[name].dtype.numpy_dtype, + ghost_layers=gls, + target='cpu', + update_rule=update_rule, + reads_writes=reads_writes)) + else: from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func + result.append(boundary_func(filtered_stencil, self._domainSize, index_dimensions=self.fields[name].index_dimensions, index_dim_shape=values_per_cell, dtype=self.fields[name].dtype.numpy_dtype, - ghost_layers=gls)) + ghost_layers=gls, + target='gpu', + update_rule=update_rule, + reads_writes=reads_writes)) if target == 'cpu': def result_functor(): diff --git a/pystencils/gpucuda/periodicity.py b/pystencils/gpucuda/periodicity.py index 5657d4618..ba6232322 100644 --- a/pystencils/gpucuda/periodicity.py +++ b/pystencils/gpucuda/periodicity.py @@ -1,11 +1,15 @@ import numpy as np -from pystencils import Field, Assignment +from pystencils import Field, Assignment, AssignmentCollection from pystencils.slicing import normalize_slice, get_periodic_boundary_src_dst_slices -from pystencils.gpucuda import make_python_function -from pystencils.gpucuda.kernelcreation import create_cuda_kernel +from pystencils.gpucuda import make_python_function as make_python_function_gpu +from pystencils.cpu import make_python_function as make_python_function_cpu +from pystencils import create_kernel +from lbmpy.methods.abstractlbmethod import LbmCollisionRule -def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, index_dim_shape=1, dtype=np.float64): + +def create_copy_kernel(domain_size, from_slice, to_slice, stencil, index_dimensions=0, index_dim_shape=1, + dtype=np.float64, target=None, update_rule_assignments=None, reads_writes="pull"): """Copies a rectangular part of an array to another non-overlapping part""" if index_dimensions not in (0, 1): raise NotImplementedError("Works only for one or zero index coordinates") @@ -18,25 +22,86 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \ "Slices have to have same size" - update_eqs = [] - for i in range(index_dim_shape): - eq = Assignment(f(i), f[tuple(offset)](i)) - update_eqs.append(eq) + update_eqs = set() + if update_rule_assignments is None: + for i in range(index_dim_shape): + eq = Assignment(f(i), f[tuple(offset)](i)) + update_eqs.add(eq) + else: + for update_rule_assignment in update_rule_assignments: + if reads_writes == "pull": + field_accesses = update_rule_assignment.rhs.atoms(Field.Access) + else: + field_accesses = update_rule_assignment.lhs.atoms(Field.Access) + + for field_access in field_accesses: + if 0 in stencil: + for stencil_part, off in zip(stencil, field_access.offsets): + if (stencil_part < 0 and off < 0) or (stencil_part > 0 and off > 0): + if reads_writes == "pull": + eq = Assignment(f(field_access.index[0]), f[tuple(offset)](field_access.index[0])) + update_eqs.add(eq) + if reads_writes == "push": + eq = Assignment(f[tuple(offset)](field_access.index[0]), f(field_access.index[0])) + update_eqs.add(eq) + else: + if stencil == field_access.offsets: + if reads_writes == "pull": + eq = Assignment(f(field_access.index[0]), f[tuple(offset)](field_access.index[0])) + update_eqs.add(eq) + if reads_writes == "push": + eq = Assignment(f[tuple(offset)](field_access.index[0]), f(field_access.index[0])) + update_eqs.add(eq) + + kernels = [] + if len(update_eqs) > 0: + ast = create_kernel(list(update_eqs), iteration_slice=to_slice, + skip_independence_check=True, target=target) + if target == 'cpu': + kernels.append(make_python_function_cpu(ast)) + else: + kernels.append(make_python_function_gpu(ast)) - ast = create_cuda_kernel(update_eqs, iteration_slice=to_slice, skip_independence_check=True) - return make_python_function(ast) + return kernels def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1, - thickness=None, dtype=float): + thickness=None, dtype=float, target=None, update_rule=None, reads_writes="pull"): + + # sort stencil entry's by number of zeros to make sure, that surfaces, edges and corners are + # communicated in that order. Otherwise values (e.g. corner values) will be overwritten + stencil.sort(key=number_of_zeros, reverse=True) + src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness) kernels = [] - index_dimensions = index_dimensions - for src_slice, dst_slice in src_dst_slice_tuples: - kernels.append(create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype)) + + if isinstance(update_rule, Assignment): + update_rule_assignments = [update_rule] + elif isinstance(update_rule, (AssignmentCollection, LbmCollisionRule)): + update_rule_assignments = update_rule.all_assignments + else: + update_rule_assignments = None + + # by default we only get the communication pattern for pull stream + if reads_writes is not "pull" or reads_writes is not "push": + assert "not a valid argument for the definition of read or/and write access" + + for i, (src_slice, dst_slice) in enumerate(src_dst_slice_tuples): + for j in create_copy_kernel(domain_size, src_slice, dst_slice, stencil[i], index_dimensions, + index_dim_shape, dtype, target, update_rule_assignments, reads_writes): + kernels.append(j) def functor(pdfs, **_): for kernel in kernels: kernel(pdfs=pdfs) return functor + + +def number_of_zeros(stencil_entry): + count_zeros = 0 + for tmp in stencil_entry: + if tmp == 0: + count_zeros += 1 + + return count_zeros diff --git a/pystencils/transformations.py b/pystencils/transformations.py index 4490e3e1a..0346b7d93 100644 --- a/pystencils/transformations.py +++ b/pystencils/transformations.py @@ -175,12 +175,13 @@ def make_loop_over_domain(body, function_name, iteration_slice=None, ghost_layer if iteration_slice is not None: iteration_slice = normalize_slice(iteration_slice, shape) - - if ghost_layers is None: - required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses]) - ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order) - if isinstance(ghost_layers, int): - ghost_layers = [(ghost_layers, ghost_layers)] * len(loop_order) + ghost_layers = 0 + else: + if ghost_layers is None: + required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses]) + ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order) + if isinstance(ghost_layers, int): + ghost_layers = [(ghost_layers, ghost_layers)] * len(loop_order) current_body = body for i, loop_coordinate in enumerate(reversed(loop_order)): diff --git a/pystencils_tests/test_cudagpu.py b/pystencils_tests/test_cudagpu.py index 3afddb128..4a682526d 100644 --- a/pystencils_tests/test_cudagpu.py +++ b/pystencils_tests/test_cudagpu.py @@ -135,7 +135,7 @@ def test_periodicity(): arr_gpu = gpuarray.to_gpu(arr_cpu) periodicity_stencil = [(1, 0), (-1, 0), (1, 1)] - periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2) + periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2, target='gpu') periodic_cpu_kernel = periodic_cpu(periodicity_stencil) cpu_result = np.copy(arr_cpu) -- GitLab