Skip to content
Snippets Groups Projects
Commit 54241096 authored by Markus Holzer's avatar Markus Holzer Committed by Michael Kuron
Browse files

Fixed incomplete communication stencil for D3Q15

(and any other stencil omitting certain sub-diagonal directions)
parent 3e293a52
Branches
Tags
1 merge request!55Fix lbm periodicity handling
import itertools
from pystencils import Field, Assignment from pystencils import Field, Assignment
from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice
from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \ from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \
...@@ -59,14 +60,17 @@ def get_communication_slices( ...@@ -59,14 +60,17 @@ def get_communication_slices(
Return the source and destination slices for periodicity handling or communication between blocks. Return the source and destination slices for periodicity handling or communication between blocks.
:param stencil: The stencil used by the LB method. :param stencil: The stencil used by the LB method.
:param comm_stencil: The stencil defining the communication directions. If None, it will be set to stencil. :param comm_stencil: The stencil defining the communication directions. If None, it will be set to the
full stencil (D2Q9 in 2D, D3Q27 in 3D, etc.).
:param streaming_pattern: The streaming pattern. :param streaming_pattern: The streaming pattern.
:param prev_timestep: Timestep after which communication is run. :param prev_timestep: Timestep after which communication is run.
:param ghost_layers: Number of ghost layers in each direction. :param ghost_layers: Number of ghost layers in each direction.
""" """
dim = len(stencil[0])
if comm_stencil is None: if comm_stencil is None:
comm_stencil = stencil comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(dim)))
pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(len(stencil),)) pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(len(stencil),))
write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil) write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil)
...@@ -168,8 +172,9 @@ class LBMPeriodicityHandling: ...@@ -168,8 +172,9 @@ class LBMPeriodicityHandling:
stencil = get_stencil(stencil) stencil = get_stencil(stencil)
self.stencil = stencil self.stencil = stencil
self.dim = len(stencil[0])
self.dh = data_handling self.dh = data_handling
target = data_handling.default_target target = data_handling.default_target
assert target in ['cpu', 'gpu', 'opencl'] assert target in ['cpu', 'gpu', 'opencl']
...@@ -184,13 +189,16 @@ class LBMPeriodicityHandling: ...@@ -184,13 +189,16 @@ class LBMPeriodicityHandling:
self.pycuda_direct_copy = target == 'gpu' and pycuda_direct_copy self.pycuda_direct_copy = target == 'gpu' and pycuda_direct_copy
def is_copy_direction(direction): def is_copy_direction(direction):
s = 0
for d, p in zip(direction, periodicity): for d, p in zip(direction, periodicity):
s += abs(d)
if d != 0 and not p: if d != 0 and not p:
return False return False
return True return s != 0
copy_directions = tuple(filter(is_copy_direction, stencil[1:])) full_stencil = itertools.product(*([-1, 0, 1] for _ in range(self.dim)))
copy_directions = tuple(filter(is_copy_direction, full_stencil))
self.comm_slices = [] self.comm_slices = []
timesteps = get_timesteps(streaming_pattern) timesteps = get_timesteps(streaming_pattern)
for timestep in timesteps: for timestep in timesteps:
...@@ -224,7 +232,7 @@ class LBMPeriodicityHandling: ...@@ -224,7 +232,7 @@ class LBMPeriodicityHandling:
for src, dst in self.comm_slices[timestep.idx]: for src, dst in self.comm_slices[timestep.idx]:
kernels.append( kernels.append(
periodic_pdf_copy_kernel( periodic_pdf_copy_kernel(
pdf_field, src, dst, target=self.target, pdf_field, src, dst, target=self.target,
opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx)) opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx))
return kernels return kernels
......
import pystencils as ps
import numpy as np import numpy as np
from lbmpy.stencils import get_stencil from lbmpy.stencils import get_stencil
from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice
from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices from lbmpy.creationfunctions import create_lb_update_rule
from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices, \
LBMPeriodicityHandling
from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
import pytest import pytest
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27'])
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns) @pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD]) @pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
def test_slices_not_empty(stencil, streaming_pattern, timestep): def test_slices_not_empty(stencil, streaming_pattern, timestep):
stencil = get_stencil(stencil) stencil = get_stencil(stencil)
dim = len(stencil[0]) dim = len(stencil[0])
q = len(stencil) q = len(stencil)
arr = np.zeros( (4,) * dim + (q,) ) arr = np.zeros((4,) * dim + (q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, ghost_layers=1) slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
ghost_layers=1)
for _, slices_list in slices.items(): for _, slices_list in slices.items():
for src, dst in slices_list: for src, dst in slices_list:
assert all(s != 0 for s in arr[src].shape) assert all(s != 0 for s in arr[src].shape)
assert all(s != 0 for s in arr[dst].shape) assert all(s != 0 for s in arr[dst].shape)
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27']) @pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns) @pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD]) @pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
def test_src_dst_same_shape(stencil, streaming_pattern, timestep): def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
stencil = get_stencil(stencil) stencil = get_stencil(stencil)
dim = len(stencil[0]) dim = len(stencil[0])
q = len(stencil) q = len(stencil)
arr = np.zeros( (4,) * dim + (q,) ) arr = np.zeros((4,) * dim + (q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, ghost_layers=1) slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
ghost_layers=1)
for _, slices_list in slices.items(): for _, slices_list in slices.items():
for src, dst in slices_list: for src, dst in slices_list:
src_shape = arr[src].shape src_shape = arr[src].shape
...@@ -37,7 +45,7 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep): ...@@ -37,7 +45,7 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
assert src_shape == dst_shape assert src_shape == dst_shape
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27']) @pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
def test_pull_communication_slices(stencil): def test_pull_communication_slices(stencil):
stencil = get_stencil(stencil) stencil = get_stencil(stencil)
...@@ -52,9 +60,71 @@ def test_pull_communication_slices(stencil): ...@@ -52,9 +60,71 @@ def test_pull_communication_slices(stencil):
src = s[0][:-1] src = s[0][:-1]
dst = s[1][:-1] dst = s[1][:-1]
break break
inner_slice = _fix_length_one_slices(get_slice_before_ghost_layer(d, ghost_layers=1)) inner_slice = _fix_length_one_slices(get_slice_before_ghost_layer(d, ghost_layers=1))
inv_dir = (-e for e in d) inv_dir = (-e for e in d)
gl_slice = _fix_length_one_slices(get_ghost_region_slice(inv_dir, ghost_layers=1)) gl_slice = _fix_length_one_slices(get_ghost_region_slice(inv_dir, ghost_layers=1))
assert src == inner_slice assert src == inner_slice
assert dst == gl_slice assert dst == gl_slice
\ No newline at end of file
@pytest.mark.parametrize('stencil_name', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
def test_optimised_and_full_communication_equivalence(stencil_name):
target = 'cpu'
stencil = get_stencil(stencil_name)
dim = len(stencil[0])
domain_size = (4, ) * dim
dh = ps.create_data_handling(domain_size, periodicity=(True, ) * dim,
parallel=False, default_target=target)
pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
dh.fill("pdf", 0, ghost_layers=True)
pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64)
dh.fill("pdf_tmp", 0, ghost_layers=True)
gl = dh.ghost_layers_of_field("pdf")
num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
dh.cpu_arrays['pdf'][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num
num += 1
ac = create_lb_update_rule(stencil=stencil,
optimization={"symbolic_field": pdf,
"symbolic_temporary_field": pdf_tmp},
kernel_type='stream_pull_only')
ast = ps.create_kernel(ac, target=dh.default_target, cpu_openmp=True)
stream = ast.compile()
full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True})
full_communication()
dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp")
pdf_full_communication = np.copy(dh.cpu_arrays['pdf'])
num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
dh.cpu_arrays['pdf'][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num
num += 1
optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name,
streaming_pattern='pull')
optimised_communication()
dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp")
if dim == 3:
for i in range(gl, domain_size[0]):
for j in range(gl, domain_size[1]):
for k in range(gl, domain_size[2]):
for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f)
else:
for i in range(gl, domain_size[0]):
for j in range(gl, domain_size[1]):
for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment