Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (11)
......@@ -29,7 +29,7 @@ stages:
tests-and-coverage:
stage: pretest
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
script:
# - pip install sympy --upgrade
- export NUM_CORES=$(nproc --all)
......
------------------------ Important ---------------------------------
lbmpy is under the following GNU AGPLv3 license.
This license holds for the sources of lbmpy itself as well
as for all kernels generated with lbmpy i.e.
the output of lbmpy is also protected by the GNU AGPLv3 license.
----------------------------------------------------------------------
GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007
......
import itertools
from pystencils import CreateKernelConfig, Field, Assignment, AssignmentCollection
from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice
from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \
Timestep, get_timesteps, numeric_offsets
from pystencils.slicing import (
shift_slice,
get_slice_before_ghost_layer,
normalize_slice,
)
from lbmpy.advanced_streaming.utility import (
is_inplace,
get_accessor,
numeric_index,
Timestep,
get_timesteps,
numeric_offsets,
)
from pystencils.datahandling import SerialDataHandling
from pystencils.enums import Target
from itertools import chain
......@@ -10,22 +20,28 @@ from itertools import chain
class LBMPeriodicityHandling:
def __init__(self, stencil, data_handling, pdf_field_name,
streaming_pattern='pull', ghost_layers=1,
cupy_direct_copy=True):
def __init__(
self,
stencil,
data_handling,
pdf_field_name,
streaming_pattern="pull",
ghost_layers=1,
cupy_direct_copy=True,
):
"""
Periodicity Handling for Lattice Boltzmann Streaming.
**On the usage with cuda:**
- cupy allows the copying of sliced arrays within device memory using the numpy syntax,
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case.
Periodicity Handling for Lattice Boltzmann Streaming.
**On the usage with cuda:**
- cupy allows the copying of sliced arrays within device memory using the numpy syntax,
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case.
"""
if not isinstance(data_handling, SerialDataHandling):
raise ValueError('Only serial data handling is supported!')
raise ValueError("Only serial data handling is supported!")
self.stencil = stencil
self.dim = stencil.D
......@@ -56,12 +72,16 @@ class LBMPeriodicityHandling:
self.comm_slices = []
timesteps = get_timesteps(streaming_pattern)
for timestep in timesteps:
slices_per_comm_dir = get_communication_slices(stencil=stencil,
comm_stencil=copy_directions,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=ghost_layers)
self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
slices_per_comm_dir = get_communication_slices(
stencil=stencil,
comm_stencil=copy_directions,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=ghost_layers,
)
self.comm_slices.append(
list(chain.from_iterable(v for k, v in slices_per_comm_dir.items()))
)
if self.target == Target.GPU and not cupy_direct_copy:
self.device_copy_kernels = list()
......@@ -81,11 +101,11 @@ class LBMPeriodicityHandling:
arr[dst] = arr[src]
def _compile_copy_kernels(self, timestep):
assert self.target == Target.GPU
pdf_field = self.dh.fields[self.pdf_field_name]
kernels = []
for src, dst in self.comm_slices[timestep.idx]:
kernels.append(
periodic_pdf_copy_kernel(pdf_field, src, dst, target=self.target))
kernels.append(periodic_pdf_gpu_copy_kernel(pdf_field, src, dst))
return kernels
def _periodicity_handling_gpu(self, prev_timestep):
......@@ -100,7 +120,12 @@ class LBMPeriodicityHandling:
def get_communication_slices(
stencil, comm_stencil=None, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1):
stencil,
comm_stencil=None,
streaming_pattern="pull",
prev_timestep=Timestep.BOTH,
ghost_layers=1,
):
"""
Return the source and destination slices for periodicity handling or communication between blocks.
......@@ -116,7 +141,9 @@ def get_communication_slices(
if comm_stencil is None:
comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(stencil.D)))
pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,))
pdfs = Field.create_generic(
"pdfs", spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,)
)
write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil)
slices_per_comm_direction = dict()
......@@ -130,7 +157,9 @@ def get_communication_slices(
d = stencil.index(streaming_dir)
write_index = numeric_index(write_accesses[d])[0]
origin_slice = get_slice_before_ghost_layer(comm_dir, ghost_layers=ghost_layers, thickness=1)
origin_slice = get_slice_before_ghost_layer(
comm_dir, ghost_layers=ghost_layers, thickness=1
)
src_slice = _fix_length_one_slices(origin_slice)
write_offsets = numeric_offsets(write_accesses[d])
......@@ -138,13 +167,15 @@ def get_communication_slices(
# TODO: this is just a hotfix. _trim_slice_in_direction breaks FreeSlip BC with adjacent periodic side
if streaming_pattern != "pull":
src_slice = shift_slice(_trim_slice_in_direction(src_slice, tangential_dir), write_offsets)
src_slice = shift_slice(
_trim_slice_in_direction(src_slice, tangential_dir), write_offsets
)
neighbour_transform = _get_neighbour_transform(comm_dir, ghost_layers)
dst_slice = shift_slice(src_slice, neighbour_transform)
src_slice = src_slice + (write_index, )
dst_slice = dst_slice + (write_index, )
src_slice = src_slice + (write_index,)
dst_slice = dst_slice + (write_index,)
slices_for_dir.append((src_slice, dst_slice))
......@@ -152,10 +183,10 @@ def get_communication_slices(
return slices_per_comm_direction
def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
domain_size=None, target=Target.GPU):
"""Copies a rectangular array slice onto another non-overlapping array slice"""
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
def periodic_pdf_gpu_copy_kernel(pdf_field, src_slice, dst_slice, domain_size=None):
"""Generate a GPU kernel which copies all values from one slice of a field
to another non-overlapping slice."""
from pystencils import create_kernel
pdf_idx = src_slice[-1]
assert isinstance(pdf_idx, int), "PDF index needs to be an integer constant"
......@@ -176,18 +207,28 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
def _stop(s):
return s.stop if isinstance(s, slice) else s
offset = [_start(s1) - _start(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)]
assert offset == [_stop(s1) - _stop(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \
"Slices have to have same size"
copy_eq = AssignmentCollection(main_assignments=[Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))])
config = CreateKernelConfig(iteration_slice=dst_slice, skip_independence_check=True)
ast = create_cuda_kernel(copy_eq, config=config)
if target == Target.GPU:
from pystencils.gpucuda import make_python_function
return make_python_function(ast)
else:
raise ValueError('Invalid target:', target)
offset = [
_start(s1) - _start(s2)
for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
]
assert offset == [
_stop(s1) - _stop(s2)
for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
], "Slices have to have same size"
copy_eq = AssignmentCollection(
main_assignments=[
Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))
]
)
config = CreateKernelConfig(
iteration_slice=dst_slice,
skip_independence_check=True,
target=Target.GPU,
)
ast = create_kernel(copy_eq, config=config)
return ast.compile()
def _extend_dir(direction):
......@@ -196,10 +237,10 @@ def _extend_dir(direction):
elif direction[0] == 0:
for d in [-1, 0, 1]:
for rest in _extend_dir(direction[1:]):
yield (d, ) + rest
yield (d,) + rest
else:
for rest in _extend_dir(direction[1:]):
yield (direction[0], ) + rest
yield (direction[0],) + rest
def _get_neighbour_transform(direction, ghost_layers):
......
......@@ -157,7 +157,7 @@ class MuskerLaw(ImplicitWallFunctionModel):
def law(u_p, y_p):
arctan = sp.Float(5.424) * sp.atan(sp.Float(0.119760479041916168) * y_p - sp.Float(0.488023952095808383))
logarithm = (sp.Float(0.434) * sp.log((y_p + sp.Float(10.6)) ** sp.Float(9.6)
/ (y_p ** 2 - sp.Float(8.15) * y_p + sp.Float(86)) ** 2, 10))
/ (y_p ** 2 - sp.Float(8.15) * y_p + sp.Float(86)) ** 2))
return (arctan + logarithm - sp.Float(3.50727901936264842)) - u_p
u_plus = velocity_symbol / self.u_tau[0]
......
......@@ -376,8 +376,8 @@ class LBMConfig:
if not self.compressible and self.method in (Method.MONOMIAL_CUMULANT, Method.CUMULANT):
raise ValueError("Incompressible cumulant-based methods are not supported (yet).")
if self.zero_centered and (self.entropic or self.fluctuating):
raise ValueError("Entropic and fluctuating methods can only be created with `zero_centered=False`.")
if self.zero_centered and self.entropic:
raise ValueError("Entropic methods can only be created with `zero_centered=False`.")
# Check or infer delta-equilibrium
if self.delta_equilibrium is not None:
......
......@@ -19,9 +19,7 @@ def add_fluctuations_to_collision_rule(collision_rule, temperature=None, amplitu
""""""
if not (temperature and not amplitudes) or (temperature and amplitudes):
raise ValueError("Fluctuating LBM: Pass either 'temperature' or 'amplitudes'.")
if collision_rule.method.conserved_quantity_computation.zero_centered_pdfs:
raise ValueError("The fluctuating LBM is not implemented for zero-centered PDF storage.")
method = collision_rule.method
if not amplitudes:
amplitudes = fluctuation_amplitude_from_temperature(method, temperature, c_s_sq)
......@@ -44,9 +42,7 @@ def fluctuation_amplitude_from_temperature(method, temperature, c_s_sq=sp.Symbol
"""Produces amplitude equations according to (2.60) and (3.54) in Schiller08"""
normalization_factors = sp.matrix_multiply_elementwise(method.moment_matrix, method.moment_matrix) * \
sp.Matrix(method.weights)
density = method.zeroth_order_equilibrium_moment_symbol
if method.conserved_quantity_computation.zero_centered_pdfs:
density += 1
density = method._cqc.density_symbol
mu = temperature * density / c_s_sq
return [sp.sqrt(mu * norm * (1 - (1 - rr) ** 2))
for norm, rr in zip(normalization_factors, method.relaxation_rates)]
......
......@@ -172,7 +172,7 @@ class PdfsToMomentsByMatrixTransform(AbstractRawMomentTransform):
# ----------------------------- Private Members -----------------------------
@ property
@property
def _default_simplification(self):
forward_simp = SimplificationStrategy()
# forward_simp.add(substitute_moments_in_conserved_quantity_equations)
......@@ -218,7 +218,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
self.moment_polynomials)
self.poly_to_mono_matrix = self.mono_to_poly_matrix.inv()
@ property
@property
def absorbs_conserved_quantity_equations(self):
return True
......@@ -414,7 +414,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
# ----------------------------- Private Members -----------------------------
@ property
@property
def _default_simplification(self):
from lbmpy.methods.momentbased.momentbasedsimplifications import (
substitute_moments_in_conserved_quantity_equations,
......
......@@ -120,6 +120,26 @@ def create_stream_pull_with_output_kernel(lb_method, src_field, dst_field=None,
simplification_hints=output_eq_collection.simplification_hints)
def create_copy_kernel(stencil, src_field, dst_field, accessor=StreamPullTwoFieldsAccessor()):
"""Creates a copy kernel, which can be used to transfer information from src to dst field.
Args:
stencil: lattice Boltzmann stencil which is used in the form of a tuple of tuples
src_field: field used for reading pdf values
dst_field: field used for writing pdf values
accessor: instance of PdfFieldAccessor, defining where to read and write values
to create e.g. a fused stream-collide kernel See 'fieldaccess.PdfFieldAccessor'
Returns:
AssignmentCollection of a copy update rule
"""
temporary_symbols = sp.symbols(f'copied:{stencil.Q}')
subexpressions = [Assignment(tmp, acc) for tmp, acc in zip(temporary_symbols, accessor.write(src_field, stencil))]
main_assignments = [Assignment(acc, tmp) for acc, tmp in zip(accessor.write(dst_field, stencil), temporary_symbols)]
return AssignmentCollection(main_assignments, subexpressions=subexpressions)
# ---------------------------------- Pdf array creation for various layouts --------------------------------------------
......
......@@ -5,36 +5,52 @@ import numpy as np
from lbmpy.stencils import LBStencil
from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice
from lbmpy.creationfunctions import create_lb_update_rule, LBMConfig, LBMOptimisation
from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices, \
LBMPeriodicityHandling
from lbmpy.advanced_streaming.communication import (
get_communication_slices,
_fix_length_one_slices,
LBMPeriodicityHandling,
periodic_pdf_gpu_copy_kernel,
)
from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
from lbmpy.enums import Stencil
import pytest
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
@pytest.mark.parametrize(
"stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
def test_slices_not_empty(stencil, streaming_pattern, timestep):
stencil = LBStencil(stencil)
arr = np.zeros((4,) * stencil.D + (stencil.Q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
ghost_layers=1)
slices = get_communication_slices(
stencil,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=1,
)
for _, slices_list in slices.items():
for src, dst in slices_list:
assert all(s != 0 for s in arr[src].shape)
assert all(s != 0 for s in arr[dst].shape)
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
@pytest.mark.parametrize(
"stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
stencil = LBStencil(stencil)
arr = np.zeros((4,) * stencil.D + (stencil.Q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
ghost_layers=1)
slices = get_communication_slices(
stencil,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=1,
)
for _, slices_list in slices.items():
for src, dst in slices_list:
src_shape = arr[src].shape
......@@ -42,12 +58,15 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
assert src_shape == dst_shape
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize(
"stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
def test_pull_communication_slices(stencil):
stencil = LBStencil(stencil)
slices = get_communication_slices(
stencil, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1)
stencil, streaming_pattern="pull", prev_timestep=Timestep.BOTH, ghost_layers=1
)
for i, d in enumerate(stencil):
if i == 0:
continue
......@@ -58,21 +77,115 @@ def test_pull_communication_slices(stencil):
dst = s[1][:-1]
break
inner_slice = _fix_length_one_slices(get_slice_before_ghost_layer(d, ghost_layers=1))
inner_slice = _fix_length_one_slices(
get_slice_before_ghost_layer(d, ghost_layers=1)
)
inv_dir = (-e for e in d)
gl_slice = _fix_length_one_slices(get_ghost_region_slice(inv_dir, ghost_layers=1))
gl_slice = _fix_length_one_slices(
get_ghost_region_slice(inv_dir, ghost_layers=1)
)
assert src == inner_slice
assert dst == gl_slice
@pytest.mark.parametrize('stencil_name', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize("direction", LBStencil(Stencil.D3Q27).stencil_entries)
@pytest.mark.parametrize("pull", [False, True])
def test_gpu_comm_kernels(direction: tuple, pull: bool):
pytest.importorskip("cupy")
stencil = LBStencil(Stencil.D3Q27)
inv_dir = stencil[stencil.inverse_index(direction)]
target = ps.Target.GPU
domain_size = (4,) * stencil.D
dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
field = dh.add_array("field", values_per_cell=2)
if pull:
dst_slice = get_ghost_region_slice(inv_dir)
src_slice = get_slice_before_ghost_layer(direction)
else:
dst_slice = get_slice_before_ghost_layer(direction)
src_slice = get_ghost_region_slice(inv_dir)
src_slice += (1,)
dst_slice += (1,)
kernel = periodic_pdf_gpu_copy_kernel(field, src_slice, dst_slice)
dh.cpu_arrays[field.name][src_slice] = 42.0
dh.all_to_gpu()
dh.run_kernel(kernel)
dh.all_to_cpu()
np.testing.assert_equal(dh.cpu_arrays[field.name][dst_slice], 42.0)
@pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19])
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
def test_direct_copy_and_kernels_equivalence(stencil: Stencil, streaming_pattern: str):
pytest.importorskip("cupy")
target = ps.Target.GPU
stencil = LBStencil(stencil)
domain_size = (4,) * stencil.D
dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
pdfs_a = dh.add_array("pdfs_a", values_per_cell=stencil.Q)
pdfs_b = dh.add_array("pdfs_b", values_per_cell=stencil.Q)
dh.fill(pdfs_a.name, 0.0, ghost_layers=True)
dh.fill(pdfs_b.name, 0.0, ghost_layers=True)
for q in range(stencil.Q):
sl = ps.make_slice[:4, :4, q] if stencil.D == 2 else ps.make_slice[:4, :4, :4, q]
dh.cpu_arrays[pdfs_a.name][sl] = q
dh.cpu_arrays[pdfs_b.name][sl] = q
dh.all_to_gpu()
direct_copy = LBMPeriodicityHandling(stencil, dh, pdfs_a.name, streaming_pattern, cupy_direct_copy=True)
kernels_copy = LBMPeriodicityHandling(stencil, dh, pdfs_b.name, streaming_pattern, cupy_direct_copy=False)
direct_copy(Timestep.EVEN)
kernels_copy(Timestep.EVEN)
dh.all_to_cpu()
np.testing.assert_equal(
dh.cpu_arrays[pdfs_a.name],
dh.cpu_arrays[pdfs_b.name]
)
@pytest.mark.parametrize(
"stencil_name", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
def test_optimised_and_full_communication_equivalence(stencil_name):
target = ps.Target.CPU
stencil = LBStencil(stencil_name)
domain_size = (4, ) * stencil.D
domain_size = (4,) * stencil.D
dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D,
parallel=False, default_target=target)
dh = ps.create_data_handling(
domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
dh.fill("pdf", 0, ghost_layers=True)
......@@ -82,9 +195,9 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
gl = dh.ghost_layers_of_field("pdf")
num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
dh.cpu_arrays['pdf'][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num
for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
dh.cpu_arrays["pdf"][idx] = num
dh.cpu_arrays["pdf_tmp"][idx] = num
num += 1
lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only")
......@@ -95,21 +208,27 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
ast = ps.create_kernel(ac, config=config)
stream = ast.compile()
full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True})
full_communication = dh.synchronization_function(
pdf.name, target=dh.default_target, optimization={"openmp": True}
)
full_communication()
dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp")
pdf_full_communication = np.copy(dh.cpu_arrays['pdf'])
pdf_full_communication = np.copy(dh.cpu_arrays["pdf"])
num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
dh.cpu_arrays['pdf'][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num
for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
dh.cpu_arrays["pdf"][idx] = num
dh.cpu_arrays["pdf_tmp"][idx] = num
num += 1
optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name,
streaming_pattern='pull')
optimised_communication = LBMPeriodicityHandling(
stencil=stencil,
data_handling=dh,
pdf_field_name=pdf.name,
streaming_pattern="pull",
)
optimised_communication()
dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp")
......@@ -119,9 +238,14 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
for j in range(gl, domain_size[1]):
for k in range(gl, domain_size[2]):
for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f)
assert (
dh.cpu_arrays["pdf"][i, j, k, f]
== pdf_full_communication[i, j, k, f]
), print(f)
else:
for i in range(gl, domain_size[0]):
for j in range(gl, domain_size[1]):
for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
assert (
dh.cpu_arrays["pdf"][i, j, f] == pdf_full_communication[i, j, f]
)
This diff is collapsed.