Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (14)
Showing with 665 additions and 258 deletions
......@@ -29,7 +29,7 @@ stages:
tests-and-coverage:
stage: pretest
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
script:
# - pip install sympy --upgrade
- export NUM_CORES=$(nproc --all)
......
------------------------ Important ---------------------------------
lbmpy is under the following GNU AGPLv3 license.
This license holds for the sources of lbmpy itself as well
as for all kernels generated with lbmpy i.e.
the output of lbmpy is also protected by the GNU AGPLv3 license.
----------------------------------------------------------------------
GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007
......
......@@ -12,6 +12,7 @@ from .lbstep import LatticeBoltzmannStep
from .macroscopic_value_kernels import (
pdf_initialization_assignments,
macroscopic_values_getter,
strain_rate_tensor_getter,
compile_macroscopic_values_getter,
compile_macroscopic_values_setter,
create_advanced_velocity_setter_collision_rule,
......@@ -42,6 +43,7 @@ __all__ = [
"LatticeBoltzmannStep",
"pdf_initialization_assignments",
"macroscopic_values_getter",
"strain_rate_tensor_getter",
"compile_macroscopic_values_getter",
"compile_macroscopic_values_setter",
"create_advanced_velocity_setter_collision_rule",
......
import itertools
from pystencils import CreateKernelConfig, Field, Assignment, AssignmentCollection
from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice
from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \
Timestep, get_timesteps, numeric_offsets
from pystencils.slicing import (
shift_slice,
get_slice_before_ghost_layer,
normalize_slice,
)
from lbmpy.advanced_streaming.utility import (
is_inplace,
get_accessor,
numeric_index,
Timestep,
get_timesteps,
numeric_offsets,
)
from pystencils.datahandling import SerialDataHandling
from pystencils.enums import Target
from itertools import chain
......@@ -10,22 +20,28 @@ from itertools import chain
class LBMPeriodicityHandling:
def __init__(self, stencil, data_handling, pdf_field_name,
streaming_pattern='pull', ghost_layers=1,
cupy_direct_copy=True):
def __init__(
self,
stencil,
data_handling,
pdf_field_name,
streaming_pattern="pull",
ghost_layers=1,
cupy_direct_copy=True,
):
"""
Periodicity Handling for Lattice Boltzmann Streaming.
**On the usage with cuda:**
- cupy allows the copying of sliced arrays within device memory using the numpy syntax,
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case.
Periodicity Handling for Lattice Boltzmann Streaming.
**On the usage with cuda:**
- cupy allows the copying of sliced arrays within device memory using the numpy syntax,
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case.
"""
if not isinstance(data_handling, SerialDataHandling):
raise ValueError('Only serial data handling is supported!')
raise ValueError("Only serial data handling is supported!")
self.stencil = stencil
self.dim = stencil.D
......@@ -56,12 +72,16 @@ class LBMPeriodicityHandling:
self.comm_slices = []
timesteps = get_timesteps(streaming_pattern)
for timestep in timesteps:
slices_per_comm_dir = get_communication_slices(stencil=stencil,
comm_stencil=copy_directions,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=ghost_layers)
self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
slices_per_comm_dir = get_communication_slices(
stencil=stencil,
comm_stencil=copy_directions,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=ghost_layers,
)
self.comm_slices.append(
list(chain.from_iterable(v for k, v in slices_per_comm_dir.items()))
)
if self.target == Target.GPU and not cupy_direct_copy:
self.device_copy_kernels = list()
......@@ -81,11 +101,11 @@ class LBMPeriodicityHandling:
arr[dst] = arr[src]
def _compile_copy_kernels(self, timestep):
assert self.target == Target.GPU
pdf_field = self.dh.fields[self.pdf_field_name]
kernels = []
for src, dst in self.comm_slices[timestep.idx]:
kernels.append(
periodic_pdf_copy_kernel(pdf_field, src, dst, target=self.target))
kernels.append(periodic_pdf_gpu_copy_kernel(pdf_field, src, dst))
return kernels
def _periodicity_handling_gpu(self, prev_timestep):
......@@ -100,7 +120,12 @@ class LBMPeriodicityHandling:
def get_communication_slices(
stencil, comm_stencil=None, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1):
stencil,
comm_stencil=None,
streaming_pattern="pull",
prev_timestep=Timestep.BOTH,
ghost_layers=1,
):
"""
Return the source and destination slices for periodicity handling or communication between blocks.
......@@ -116,7 +141,9 @@ def get_communication_slices(
if comm_stencil is None:
comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(stencil.D)))
pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,))
pdfs = Field.create_generic(
"pdfs", spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,)
)
write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil)
slices_per_comm_direction = dict()
......@@ -130,7 +157,9 @@ def get_communication_slices(
d = stencil.index(streaming_dir)
write_index = numeric_index(write_accesses[d])[0]
origin_slice = get_slice_before_ghost_layer(comm_dir, ghost_layers=ghost_layers, thickness=1)
origin_slice = get_slice_before_ghost_layer(
comm_dir, ghost_layers=ghost_layers, thickness=1
)
src_slice = _fix_length_one_slices(origin_slice)
write_offsets = numeric_offsets(write_accesses[d])
......@@ -138,13 +167,15 @@ def get_communication_slices(
# TODO: this is just a hotfix. _trim_slice_in_direction breaks FreeSlip BC with adjacent periodic side
if streaming_pattern != "pull":
src_slice = shift_slice(_trim_slice_in_direction(src_slice, tangential_dir), write_offsets)
src_slice = shift_slice(
_trim_slice_in_direction(src_slice, tangential_dir), write_offsets
)
neighbour_transform = _get_neighbour_transform(comm_dir, ghost_layers)
dst_slice = shift_slice(src_slice, neighbour_transform)
src_slice = src_slice + (write_index, )
dst_slice = dst_slice + (write_index, )
src_slice = src_slice + (write_index,)
dst_slice = dst_slice + (write_index,)
slices_for_dir.append((src_slice, dst_slice))
......@@ -152,10 +183,10 @@ def get_communication_slices(
return slices_per_comm_direction
def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
domain_size=None, target=Target.GPU):
"""Copies a rectangular array slice onto another non-overlapping array slice"""
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
def periodic_pdf_gpu_copy_kernel(pdf_field, src_slice, dst_slice, domain_size=None):
"""Generate a GPU kernel which copies all values from one slice of a field
to another non-overlapping slice."""
from pystencils import create_kernel
pdf_idx = src_slice[-1]
assert isinstance(pdf_idx, int), "PDF index needs to be an integer constant"
......@@ -176,18 +207,28 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
def _stop(s):
return s.stop if isinstance(s, slice) else s
offset = [_start(s1) - _start(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)]
assert offset == [_stop(s1) - _stop(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \
"Slices have to have same size"
copy_eq = AssignmentCollection(main_assignments=[Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))])
config = CreateKernelConfig(iteration_slice=dst_slice, skip_independence_check=True)
ast = create_cuda_kernel(copy_eq, config=config)
if target == Target.GPU:
from pystencils.gpucuda import make_python_function
return make_python_function(ast)
else:
raise ValueError('Invalid target:', target)
offset = [
_start(s1) - _start(s2)
for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
]
assert offset == [
_stop(s1) - _stop(s2)
for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
], "Slices have to have same size"
copy_eq = AssignmentCollection(
main_assignments=[
Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))
]
)
config = CreateKernelConfig(
iteration_slice=dst_slice,
skip_independence_check=True,
target=Target.GPU,
)
ast = create_kernel(copy_eq, config=config)
return ast.compile()
def _extend_dir(direction):
......@@ -196,10 +237,10 @@ def _extend_dir(direction):
elif direction[0] == 0:
for d in [-1, 0, 1]:
for rest in _extend_dir(direction[1:]):
yield (d, ) + rest
yield (d,) + rest
else:
for rest in _extend_dir(direction[1:]):
yield (direction[0], ) + rest
yield (direction[0],) + rest
def _get_neighbour_transform(direction, ghost_layers):
......
......@@ -157,7 +157,7 @@ class MuskerLaw(ImplicitWallFunctionModel):
def law(u_p, y_p):
arctan = sp.Float(5.424) * sp.atan(sp.Float(0.119760479041916168) * y_p - sp.Float(0.488023952095808383))
logarithm = (sp.Float(0.434) * sp.log((y_p + sp.Float(10.6)) ** sp.Float(9.6)
/ (y_p ** 2 - sp.Float(8.15) * y_p + sp.Float(86)) ** 2, 10))
/ (y_p ** 2 - sp.Float(8.15) * y_p + sp.Float(86)) ** 2))
return (arctan + logarithm - sp.Float(3.50727901936264842)) - u_p
u_plus = velocity_symbol / self.u_tau[0]
......
......@@ -376,8 +376,8 @@ class LBMConfig:
if not self.compressible and self.method in (Method.MONOMIAL_CUMULANT, Method.CUMULANT):
raise ValueError("Incompressible cumulant-based methods are not supported (yet).")
if self.zero_centered and (self.entropic or self.fluctuating):
raise ValueError("Entropic and fluctuating methods can only be created with `zero_centered=False`.")
if self.zero_centered and self.entropic:
raise ValueError("Entropic methods can only be created with `zero_centered=False`.")
# Check or infer delta-equilibrium
if self.delta_equilibrium is not None:
......
......@@ -19,9 +19,7 @@ def add_fluctuations_to_collision_rule(collision_rule, temperature=None, amplitu
""""""
if not (temperature and not amplitudes) or (temperature and amplitudes):
raise ValueError("Fluctuating LBM: Pass either 'temperature' or 'amplitudes'.")
if collision_rule.method.conserved_quantity_computation.zero_centered_pdfs:
raise ValueError("The fluctuating LBM is not implemented for zero-centered PDF storage.")
method = collision_rule.method
if not amplitudes:
amplitudes = fluctuation_amplitude_from_temperature(method, temperature, c_s_sq)
......@@ -44,9 +42,7 @@ def fluctuation_amplitude_from_temperature(method, temperature, c_s_sq=sp.Symbol
"""Produces amplitude equations according to (2.60) and (3.54) in Schiller08"""
normalization_factors = sp.matrix_multiply_elementwise(method.moment_matrix, method.moment_matrix) * \
sp.Matrix(method.weights)
density = method.zeroth_order_equilibrium_moment_symbol
if method.conserved_quantity_computation.zero_centered_pdfs:
density += 1
density = method._cqc.density_symbol
mu = temperature * density / c_s_sq
return [sp.sqrt(mu * norm * (1 - (1 - rr) ** 2))
for norm, rr in zip(normalization_factors, method.relaxation_rates)]
......
import functools
import sympy as sp
from copy import deepcopy
from lbmpy.simplificationfactory import create_simplification_strategy
from pystencils import create_kernel, CreateKernelConfig
from pystencils import create_kernel, CreateKernelConfig, Assignment
from pystencils.field import Field, get_layout_of_array
from pystencils.enums import Target
from lbmpy.advanced_streaming.utility import get_accessor, Timestep
from lbmpy.relaxationrates import get_shear_relaxation_rate
from lbmpy.utils import second_order_moment_tensor
def pdf_initialization_assignments(lb_method, density, velocity, pdfs,
streaming_pattern='pull', previous_timestep=Timestep.BOTH,
set_pre_collision_pdfs=False):
"""Assignments to initialize the pdf field with equilibrium"""
def get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, pre_collision_pdfs):
if isinstance(pdfs, Field):
accessor = get_accessor(streaming_pattern, previous_timestep)
if set_pre_collision_pdfs:
if pre_collision_pdfs:
field_accesses = accessor.read(pdfs, lb_method.stencil)
else:
field_accesses = accessor.write(pdfs, lb_method.stencil)
elif streaming_pattern == 'pull' and not set_pre_collision_pdfs:
elif streaming_pattern == 'pull' and not pre_collision_pdfs:
field_accesses = pdfs
else:
raise ValueError("Invalid value of pdfs: A PDF field reference is required to derive "
+ f"initialization assignments for streaming pattern {streaming_pattern}.")
return field_accesses
def pdf_initialization_assignments(lb_method, density, velocity, pdfs,
streaming_pattern='pull', previous_timestep=Timestep.BOTH,
set_pre_collision_pdfs=False):
"""Assignments to initialize the pdf field with equilibrium"""
field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, set_pre_collision_pdfs)
if isinstance(density, Field):
density = density.center
if isinstance(velocity, Field):
velocity = velocity.center_vector
......@@ -41,17 +48,9 @@ def pdf_initialization_assignments(lb_method, density, velocity, pdfs,
def macroscopic_values_getter(lb_method, density, velocity, pdfs,
streaming_pattern='pull', previous_timestep=Timestep.BOTH,
use_pre_collision_pdfs=False):
if isinstance(pdfs, Field):
accessor = get_accessor(streaming_pattern, previous_timestep)
if use_pre_collision_pdfs:
field_accesses = accessor.read(pdfs, lb_method.stencil)
else:
field_accesses = accessor.write(pdfs, lb_method.stencil)
elif streaming_pattern == 'pull' and not use_pre_collision_pdfs:
field_accesses = pdfs
else:
raise ValueError("Invalid value of pdfs: A PDF field reference is required to derive "
+ f"getter assignments for streaming pattern {streaming_pattern}.")
field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, use_pre_collision_pdfs)
cqc = lb_method.conserved_quantity_computation
assert not (velocity is None and density is None)
output_spec = {}
......@@ -65,6 +64,27 @@ def macroscopic_values_getter(lb_method, density, velocity, pdfs,
macroscopic_values_setter = pdf_initialization_assignments
def strain_rate_tensor_getter(lb_method, strain_rate_tensor, pdfs, streaming_pattern='pull',
previous_timestep=Timestep.BOTH, use_pre_collision_pdfs=False):
field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, use_pre_collision_pdfs)
if isinstance(strain_rate_tensor, Field):
strain_rate_tensor = strain_rate_tensor.center_vector
omega_s = get_shear_relaxation_rate(lb_method)
equilibrium = lb_method.equilibrium_distribution
rho = equilibrium.density if equilibrium.compressible else equilibrium.background_density
f_neq = sp.Matrix([field_accesses[i] for i in range(lb_method.stencil.Q)]) - lb_method.get_equilibrium_terms()
pi = second_order_moment_tensor(f_neq, lb_method.stencil)
strain_rate_tensor_equ = - 1.5 * (omega_s / rho) * pi
assignments = [Assignment(strain_rate_tensor[i * lb_method.stencil.D + j], strain_rate_tensor_equ[i, j])
for i in range(lb_method.stencil.D) for j in range(lb_method.stencil.D)]
return assignments
def compile_macroscopic_values_getter(lb_method, output_quantities, pdf_arr=None,
ghost_layers=1, iteration_slice=None,
field_layout='numpy', target=Target.CPU,
......
......@@ -124,7 +124,8 @@ class AbstractLbMethod(abc.ABC):
substitutions = [Assignment(e[1], e[0]) for e in subexpressions.items()]
if relaxation_rates_modifier is not None:
symbolic_relaxation_rates = [r * relaxation_rates_modifier for r in symbolic_relaxation_rates]
for srr in symbolic_relaxation_rates:
assert isinstance(srr, sp.Symbol)
else:
for srr in symbolic_relaxation_rates:
assert isinstance(srr, sp.Symbol)
return substitutions, sp.diag(*symbolic_relaxation_rates)
......@@ -172,7 +172,7 @@ class PdfsToMomentsByMatrixTransform(AbstractRawMomentTransform):
# ----------------------------- Private Members -----------------------------
@ property
@property
def _default_simplification(self):
forward_simp = SimplificationStrategy()
# forward_simp.add(substitute_moments_in_conserved_quantity_equations)
......@@ -218,7 +218,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
self.moment_polynomials)
self.poly_to_mono_matrix = self.mono_to_poly_matrix.inv()
@ property
@property
def absorbs_conserved_quantity_equations(self):
return True
......@@ -414,7 +414,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
# ----------------------------- Private Members -----------------------------
@ property
@property
def _default_simplification(self):
from lbmpy.methods.momentbased.momentbasedsimplifications import (
substitute_moments_in_conserved_quantity_equations,
......
......@@ -120,6 +120,26 @@ def create_stream_pull_with_output_kernel(lb_method, src_field, dst_field=None,
simplification_hints=output_eq_collection.simplification_hints)
def create_copy_kernel(stencil, src_field, dst_field, accessor=StreamPullTwoFieldsAccessor()):
"""Creates a copy kernel, which can be used to transfer information from src to dst field.
Args:
stencil: lattice Boltzmann stencil which is used in the form of a tuple of tuples
src_field: field used for reading pdf values
dst_field: field used for writing pdf values
accessor: instance of PdfFieldAccessor, defining where to read and write values
to create e.g. a fused stream-collide kernel See 'fieldaccess.PdfFieldAccessor'
Returns:
AssignmentCollection of a copy update rule
"""
temporary_symbols = sp.symbols(f'copied:{stencil.Q}')
subexpressions = [Assignment(tmp, acc) for tmp, acc in zip(temporary_symbols, accessor.write(src_field, stencil))]
main_assignments = [Assignment(acc, tmp) for acc, tmp in zip(accessor.write(dst_field, stencil), temporary_symbols)]
return AssignmentCollection(main_assignments, subexpressions=subexpressions)
# ---------------------------------- Pdf array creation for various layouts --------------------------------------------
......
......@@ -5,36 +5,52 @@ import numpy as np
from lbmpy.stencils import LBStencil
from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice
from lbmpy.creationfunctions import create_lb_update_rule, LBMConfig, LBMOptimisation
from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices, \
LBMPeriodicityHandling
from lbmpy.advanced_streaming.communication import (
get_communication_slices,
_fix_length_one_slices,
LBMPeriodicityHandling,
periodic_pdf_gpu_copy_kernel,
)
from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
from lbmpy.enums import Stencil
import pytest
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
@pytest.mark.parametrize(
"stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
def test_slices_not_empty(stencil, streaming_pattern, timestep):
stencil = LBStencil(stencil)
arr = np.zeros((4,) * stencil.D + (stencil.Q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
ghost_layers=1)
slices = get_communication_slices(
stencil,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=1,
)
for _, slices_list in slices.items():
for src, dst in slices_list:
assert all(s != 0 for s in arr[src].shape)
assert all(s != 0 for s in arr[dst].shape)
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
@pytest.mark.parametrize(
"stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
stencil = LBStencil(stencil)
arr = np.zeros((4,) * stencil.D + (stencil.Q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
ghost_layers=1)
slices = get_communication_slices(
stencil,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=1,
)
for _, slices_list in slices.items():
for src, dst in slices_list:
src_shape = arr[src].shape
......@@ -42,12 +58,15 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
assert src_shape == dst_shape
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize(
"stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
def test_pull_communication_slices(stencil):
stencil = LBStencil(stencil)
slices = get_communication_slices(
stencil, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1)
stencil, streaming_pattern="pull", prev_timestep=Timestep.BOTH, ghost_layers=1
)
for i, d in enumerate(stencil):
if i == 0:
continue
......@@ -58,21 +77,115 @@ def test_pull_communication_slices(stencil):
dst = s[1][:-1]
break
inner_slice = _fix_length_one_slices(get_slice_before_ghost_layer(d, ghost_layers=1))
inner_slice = _fix_length_one_slices(
get_slice_before_ghost_layer(d, ghost_layers=1)
)
inv_dir = (-e for e in d)
gl_slice = _fix_length_one_slices(get_ghost_region_slice(inv_dir, ghost_layers=1))
gl_slice = _fix_length_one_slices(
get_ghost_region_slice(inv_dir, ghost_layers=1)
)
assert src == inner_slice
assert dst == gl_slice
@pytest.mark.parametrize('stencil_name', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize("direction", LBStencil(Stencil.D3Q27).stencil_entries)
@pytest.mark.parametrize("pull", [False, True])
def test_gpu_comm_kernels(direction: tuple, pull: bool):
pytest.importorskip("cupy")
stencil = LBStencil(Stencil.D3Q27)
inv_dir = stencil[stencil.inverse_index(direction)]
target = ps.Target.GPU
domain_size = (4,) * stencil.D
dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
field = dh.add_array("field", values_per_cell=2)
if pull:
dst_slice = get_ghost_region_slice(inv_dir)
src_slice = get_slice_before_ghost_layer(direction)
else:
dst_slice = get_slice_before_ghost_layer(direction)
src_slice = get_ghost_region_slice(inv_dir)
src_slice += (1,)
dst_slice += (1,)
kernel = periodic_pdf_gpu_copy_kernel(field, src_slice, dst_slice)
dh.cpu_arrays[field.name][src_slice] = 42.0
dh.all_to_gpu()
dh.run_kernel(kernel)
dh.all_to_cpu()
np.testing.assert_equal(dh.cpu_arrays[field.name][dst_slice], 42.0)
@pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19])
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
def test_direct_copy_and_kernels_equivalence(stencil: Stencil, streaming_pattern: str):
pytest.importorskip("cupy")
target = ps.Target.GPU
stencil = LBStencil(stencil)
domain_size = (4,) * stencil.D
dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
pdfs_a = dh.add_array("pdfs_a", values_per_cell=stencil.Q)
pdfs_b = dh.add_array("pdfs_b", values_per_cell=stencil.Q)
dh.fill(pdfs_a.name, 0.0, ghost_layers=True)
dh.fill(pdfs_b.name, 0.0, ghost_layers=True)
for q in range(stencil.Q):
sl = ps.make_slice[:4, :4, q] if stencil.D == 2 else ps.make_slice[:4, :4, :4, q]
dh.cpu_arrays[pdfs_a.name][sl] = q
dh.cpu_arrays[pdfs_b.name][sl] = q
dh.all_to_gpu()
direct_copy = LBMPeriodicityHandling(stencil, dh, pdfs_a.name, streaming_pattern, cupy_direct_copy=True)
kernels_copy = LBMPeriodicityHandling(stencil, dh, pdfs_b.name, streaming_pattern, cupy_direct_copy=False)
direct_copy(Timestep.EVEN)
kernels_copy(Timestep.EVEN)
dh.all_to_cpu()
np.testing.assert_equal(
dh.cpu_arrays[pdfs_a.name],
dh.cpu_arrays[pdfs_b.name]
)
@pytest.mark.parametrize(
"stencil_name", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
def test_optimised_and_full_communication_equivalence(stencil_name):
target = ps.Target.CPU
stencil = LBStencil(stencil_name)
domain_size = (4, ) * stencil.D
domain_size = (4,) * stencil.D
dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D,
parallel=False, default_target=target)
dh = ps.create_data_handling(
domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
dh.fill("pdf", 0, ghost_layers=True)
......@@ -82,9 +195,9 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
gl = dh.ghost_layers_of_field("pdf")
num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
dh.cpu_arrays['pdf'][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num
for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
dh.cpu_arrays["pdf"][idx] = num
dh.cpu_arrays["pdf_tmp"][idx] = num
num += 1
lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only")
......@@ -95,21 +208,27 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
ast = ps.create_kernel(ac, config=config)
stream = ast.compile()
full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True})
full_communication = dh.synchronization_function(
pdf.name, target=dh.default_target, optimization={"openmp": True}
)
full_communication()
dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp")
pdf_full_communication = np.copy(dh.cpu_arrays['pdf'])
pdf_full_communication = np.copy(dh.cpu_arrays["pdf"])
num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
dh.cpu_arrays['pdf'][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num
for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
dh.cpu_arrays["pdf"][idx] = num
dh.cpu_arrays["pdf_tmp"][idx] = num
num += 1
optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name,
streaming_pattern='pull')
optimised_communication = LBMPeriodicityHandling(
stencil=stencil,
data_handling=dh,
pdf_field_name=pdf.name,
streaming_pattern="pull",
)
optimised_communication()
dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp")
......@@ -119,9 +238,14 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
for j in range(gl, domain_size[1]):
for k in range(gl, domain_size[2]):
for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f)
assert (
dh.cpu_arrays["pdf"][i, j, k, f]
== pdf_full_communication[i, j, k, f]
), print(f)
else:
for i in range(gl, domain_size[0]):
for j in range(gl, domain_size[1]):
for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
assert (
dh.cpu_arrays["pdf"][i, j, f] == pdf_full_communication[i, j, f]
)
This diff is collapsed.
import pytest
import numpy as np
import sympy as sp
from lbmpy.creationfunctions import create_lb_method, LBMConfig
from lbmpy.enums import Stencil, ForceModel, Method
from lbmpy.macroscopic_value_kernels import (
compile_macroscopic_values_getter, compile_macroscopic_values_setter)
compile_macroscopic_values_getter, compile_macroscopic_values_setter, strain_rate_tensor_getter)
from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
from lbmpy.stencils import LBStencil
......@@ -83,3 +84,18 @@ def test_set_get_constant_velocity(stencil, force_model, compressible):
computed_velocity = velocity_output_field[0, 0, 0, :]
np.testing.assert_almost_equal(np.array(ref_velocity[:method.dim]), computed_velocity)
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q19])
@pytest.mark.parametrize("method_enum", [Method.SRT, Method.TRT, Method.MRT, Method.CUMULANT])
def test_get_strain_rate_tensor(stencil, method_enum):
stencil = LBStencil(stencil)
lbm_config = LBMConfig(stencil=stencil, method=method_enum, compressible=True)
method = create_lb_method(lbm_config=lbm_config)
pdfs = sp.symbols(f"f_:{stencil.Q}")
strain_rate_tensor = sp.symbols(f"strain_rate_tensor_:{method.dim * method.dim}")
getter_assignments = strain_rate_tensor_getter(method, strain_rate_tensor, pdfs)
assert len(getter_assignments) == method.dim * method.dim
import pytest
import numpy as np
from pystencils import fields, CreateKernelConfig, Target, create_kernel, get_code_str, create_data_handling
from lbmpy import LBMConfig, Stencil, Method, LBStencil, create_lb_method, create_lb_collision_rule, LBMOptimisation, \
create_lb_update_rule
from lbmpy.partially_saturated_cells import PSMConfig
@pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize("method_enum", [Method.SRT, Method.MRT, Method.CUMULANT])
def test_psm_integration(stencil, method_enum):
stencil = LBStencil(stencil)
fraction_field = fields("fraction_field: double[10, 10, 5]", layout=(2, 1, 0))
object_vel = fields("object_vel(3): double[10, 10, 5]", layout=(3, 2, 1, 0))
psm_config = PSMConfig(fraction_field=fraction_field, object_velocity_field=object_vel)
lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=1.5, compressible=True,
psm_config=psm_config)
config = CreateKernelConfig(target=Target.CPU)#
collision_rule = create_lb_collision_rule(lbm_config=lbm_config)
ast = create_kernel(collision_rule, config=config)
code_str = get_code_str(ast)
def get_data_handling_for_psm(use_psm):
domain_size = (10, 10)
stencil = LBStencil(Stencil.D2Q9)
dh = create_data_handling(domain_size=domain_size, periodicity=(True, False))
f = dh.add_array('f', values_per_cell=len(stencil))
dh.fill(f.name, 0.0, ghost_layers=True)
f_tmp = dh.add_array('f_tmp', values_per_cell=len(stencil))
dh.fill(f_tmp.name, 0.0, ghost_layers=True)
psm_config = None
if use_psm:
fraction_field = dh.add_array('fraction_field', values_per_cell=1)
dh.fill(fraction_field.name, 0.0, ghost_layers=True)
object_vel = dh.add_array('object_vel', values_per_cell=dh.dim)
dh.fill(object_vel.name, 0.0, ghost_layers=True)
psm_config = PSMConfig(fraction_field=fraction_field, object_velocity_field=object_vel)
lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=1.5, psm_config=psm_config)
lbm_optimisation = LBMOptimisation(symbolic_field=f, symbolic_temporary_field=f_tmp)
update_rule = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_optimisation)
kernel_lb_step_with_psm = create_kernel(update_rule).compile()
return (dh, kernel_lb_step_with_psm)
def test_lbm_vs_psm():
psm_dh, psm_kernel = get_data_handling_for_psm(True)
lbm_dh, lbm_kernel = get_data_handling_for_psm(False)
for i in range(20):
psm_dh.run_kernel(psm_kernel)
psm_dh.swap('f', 'f_tmp')
lbm_dh.run_kernel(lbm_kernel)
lbm_dh.swap('f', 'f_tmp')
max_vel_error = np.max(np.abs(psm_dh.gather_array('f') - lbm_dh.gather_array('f')))
np.testing.assert_allclose(max_vel_error, 0, atol=1e-14)