Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • ravi.k.ayyala/lbmpy
  • brendan-waters/lbmpy
  • anirudh.jonnalagadda/lbmpy
  • jbadwaik/lbmpy
  • alexander.reinauer/lbmpy
  • itischler/lbmpy
  • he66coqe/lbmpy
  • ev81oxyl/lbmpy
  • Bindgen/lbmpy
  • da15siwa/lbmpy
  • holzer/lbmpy
  • RudolfWeeber/lbmpy
  • pycodegen/lbmpy
13 results
Select Git revision
Show changes
Commits on Source (16)
Showing
with 817 additions and 381 deletions
...@@ -29,7 +29,7 @@ stages: ...@@ -29,7 +29,7 @@ stages:
tests-and-coverage: tests-and-coverage:
stage: pretest stage: pretest
extends: .every-commit extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
script: script:
# - pip install sympy --upgrade # - pip install sympy --upgrade
- export NUM_CORES=$(nproc --all) - export NUM_CORES=$(nproc --all)
......
...@@ -11,3 +11,4 @@ Contributors: ...@@ -11,3 +11,4 @@ Contributors:
- Rudolf Weeber <weeber@icp.uni-stuttgart.de> - Rudolf Weeber <weeber@icp.uni-stuttgart.de>
- Christian Godenschwager <christian.godenschwager@fau.de> - Christian Godenschwager <christian.godenschwager@fau.de>
- Jan Hönig <jan.hoenig@fau.de> - Jan Hönig <jan.hoenig@fau.de>
- Philipp Suffa <philipp.suffa@fau.de>
------------------------ Important ---------------------------------
lbmpy is under the following GNU AGPLv3 license.
This license holds for the sources of lbmpy itself as well
as for all kernels generated with lbmpy i.e.
the output of lbmpy is also protected by the GNU AGPLv3 license.
----------------------------------------------------------------------
GNU AFFERO GENERAL PUBLIC LICENSE GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007 Version 3, 19 November 2007
......
...@@ -12,6 +12,7 @@ from .lbstep import LatticeBoltzmannStep ...@@ -12,6 +12,7 @@ from .lbstep import LatticeBoltzmannStep
from .macroscopic_value_kernels import ( from .macroscopic_value_kernels import (
pdf_initialization_assignments, pdf_initialization_assignments,
macroscopic_values_getter, macroscopic_values_getter,
strain_rate_tensor_getter,
compile_macroscopic_values_getter, compile_macroscopic_values_getter,
compile_macroscopic_values_setter, compile_macroscopic_values_setter,
create_advanced_velocity_setter_collision_rule, create_advanced_velocity_setter_collision_rule,
...@@ -42,6 +43,7 @@ __all__ = [ ...@@ -42,6 +43,7 @@ __all__ = [
"LatticeBoltzmannStep", "LatticeBoltzmannStep",
"pdf_initialization_assignments", "pdf_initialization_assignments",
"macroscopic_values_getter", "macroscopic_values_getter",
"strain_rate_tensor_getter",
"compile_macroscopic_values_getter", "compile_macroscopic_values_getter",
"compile_macroscopic_values_setter", "compile_macroscopic_values_setter",
"create_advanced_velocity_setter_collision_rule", "create_advanced_velocity_setter_collision_rule",
......
import itertools import itertools
from pystencils import CreateKernelConfig, Field, Assignment, AssignmentCollection from pystencils import CreateKernelConfig, Field, Assignment, AssignmentCollection
from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice from pystencils.slicing import (
from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \ shift_slice,
Timestep, get_timesteps, numeric_offsets get_slice_before_ghost_layer,
normalize_slice,
)
from lbmpy.advanced_streaming.utility import (
is_inplace,
get_accessor,
numeric_index,
Timestep,
get_timesteps,
numeric_offsets,
)
from pystencils.datahandling import SerialDataHandling from pystencils.datahandling import SerialDataHandling
from pystencils.enums import Target from pystencils.enums import Target
from itertools import chain from itertools import chain
...@@ -10,22 +20,28 @@ from itertools import chain ...@@ -10,22 +20,28 @@ from itertools import chain
class LBMPeriodicityHandling: class LBMPeriodicityHandling:
def __init__(self, stencil, data_handling, pdf_field_name, def __init__(
streaming_pattern='pull', ghost_layers=1, self,
cupy_direct_copy=True): stencil,
data_handling,
pdf_field_name,
streaming_pattern="pull",
ghost_layers=1,
cupy_direct_copy=True,
):
""" """
Periodicity Handling for Lattice Boltzmann Streaming. Periodicity Handling for Lattice Boltzmann Streaming.
**On the usage with cuda:** **On the usage with cuda:**
- cupy allows the copying of sliced arrays within device memory using the numpy syntax, - cupy allows the copying of sliced arrays within device memory using the numpy syntax,
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as cupy array copying, compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds. but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case. Choose your weapon depending on your use case.
""" """
if not isinstance(data_handling, SerialDataHandling): if not isinstance(data_handling, SerialDataHandling):
raise ValueError('Only serial data handling is supported!') raise ValueError("Only serial data handling is supported!")
self.stencil = stencil self.stencil = stencil
self.dim = stencil.D self.dim = stencil.D
...@@ -56,12 +72,16 @@ class LBMPeriodicityHandling: ...@@ -56,12 +72,16 @@ class LBMPeriodicityHandling:
self.comm_slices = [] self.comm_slices = []
timesteps = get_timesteps(streaming_pattern) timesteps = get_timesteps(streaming_pattern)
for timestep in timesteps: for timestep in timesteps:
slices_per_comm_dir = get_communication_slices(stencil=stencil, slices_per_comm_dir = get_communication_slices(
comm_stencil=copy_directions, stencil=stencil,
streaming_pattern=streaming_pattern, comm_stencil=copy_directions,
prev_timestep=timestep, streaming_pattern=streaming_pattern,
ghost_layers=ghost_layers) prev_timestep=timestep,
self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items()))) ghost_layers=ghost_layers,
)
self.comm_slices.append(
list(chain.from_iterable(v for k, v in slices_per_comm_dir.items()))
)
if self.target == Target.GPU and not cupy_direct_copy: if self.target == Target.GPU and not cupy_direct_copy:
self.device_copy_kernels = list() self.device_copy_kernels = list()
...@@ -81,11 +101,11 @@ class LBMPeriodicityHandling: ...@@ -81,11 +101,11 @@ class LBMPeriodicityHandling:
arr[dst] = arr[src] arr[dst] = arr[src]
def _compile_copy_kernels(self, timestep): def _compile_copy_kernels(self, timestep):
assert self.target == Target.GPU
pdf_field = self.dh.fields[self.pdf_field_name] pdf_field = self.dh.fields[self.pdf_field_name]
kernels = [] kernels = []
for src, dst in self.comm_slices[timestep.idx]: for src, dst in self.comm_slices[timestep.idx]:
kernels.append( kernels.append(periodic_pdf_gpu_copy_kernel(pdf_field, src, dst))
periodic_pdf_copy_kernel(pdf_field, src, dst, target=self.target))
return kernels return kernels
def _periodicity_handling_gpu(self, prev_timestep): def _periodicity_handling_gpu(self, prev_timestep):
...@@ -100,7 +120,12 @@ class LBMPeriodicityHandling: ...@@ -100,7 +120,12 @@ class LBMPeriodicityHandling:
def get_communication_slices( def get_communication_slices(
stencil, comm_stencil=None, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1): stencil,
comm_stencil=None,
streaming_pattern="pull",
prev_timestep=Timestep.BOTH,
ghost_layers=1,
):
""" """
Return the source and destination slices for periodicity handling or communication between blocks. Return the source and destination slices for periodicity handling or communication between blocks.
...@@ -116,7 +141,9 @@ def get_communication_slices( ...@@ -116,7 +141,9 @@ def get_communication_slices(
if comm_stencil is None: if comm_stencil is None:
comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(stencil.D))) comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(stencil.D)))
pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,)) pdfs = Field.create_generic(
"pdfs", spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,)
)
write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil) write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil)
slices_per_comm_direction = dict() slices_per_comm_direction = dict()
...@@ -130,7 +157,9 @@ def get_communication_slices( ...@@ -130,7 +157,9 @@ def get_communication_slices(
d = stencil.index(streaming_dir) d = stencil.index(streaming_dir)
write_index = numeric_index(write_accesses[d])[0] write_index = numeric_index(write_accesses[d])[0]
origin_slice = get_slice_before_ghost_layer(comm_dir, ghost_layers=ghost_layers, thickness=1) origin_slice = get_slice_before_ghost_layer(
comm_dir, ghost_layers=ghost_layers, thickness=1
)
src_slice = _fix_length_one_slices(origin_slice) src_slice = _fix_length_one_slices(origin_slice)
write_offsets = numeric_offsets(write_accesses[d]) write_offsets = numeric_offsets(write_accesses[d])
...@@ -138,13 +167,15 @@ def get_communication_slices( ...@@ -138,13 +167,15 @@ def get_communication_slices(
# TODO: this is just a hotfix. _trim_slice_in_direction breaks FreeSlip BC with adjacent periodic side # TODO: this is just a hotfix. _trim_slice_in_direction breaks FreeSlip BC with adjacent periodic side
if streaming_pattern != "pull": if streaming_pattern != "pull":
src_slice = shift_slice(_trim_slice_in_direction(src_slice, tangential_dir), write_offsets) src_slice = shift_slice(
_trim_slice_in_direction(src_slice, tangential_dir), write_offsets
)
neighbour_transform = _get_neighbour_transform(comm_dir, ghost_layers) neighbour_transform = _get_neighbour_transform(comm_dir, ghost_layers)
dst_slice = shift_slice(src_slice, neighbour_transform) dst_slice = shift_slice(src_slice, neighbour_transform)
src_slice = src_slice + (write_index, ) src_slice = src_slice + (write_index,)
dst_slice = dst_slice + (write_index, ) dst_slice = dst_slice + (write_index,)
slices_for_dir.append((src_slice, dst_slice)) slices_for_dir.append((src_slice, dst_slice))
...@@ -152,10 +183,10 @@ def get_communication_slices( ...@@ -152,10 +183,10 @@ def get_communication_slices(
return slices_per_comm_direction return slices_per_comm_direction
def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice, def periodic_pdf_gpu_copy_kernel(pdf_field, src_slice, dst_slice, domain_size=None):
domain_size=None, target=Target.GPU): """Generate a GPU kernel which copies all values from one slice of a field
"""Copies a rectangular array slice onto another non-overlapping array slice""" to another non-overlapping slice."""
from pystencils.gpucuda.kernelcreation import create_cuda_kernel from pystencils import create_kernel
pdf_idx = src_slice[-1] pdf_idx = src_slice[-1]
assert isinstance(pdf_idx, int), "PDF index needs to be an integer constant" assert isinstance(pdf_idx, int), "PDF index needs to be an integer constant"
...@@ -176,18 +207,28 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice, ...@@ -176,18 +207,28 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
def _stop(s): def _stop(s):
return s.stop if isinstance(s, slice) else s return s.stop if isinstance(s, slice) else s
offset = [_start(s1) - _start(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)] offset = [
assert offset == [_stop(s1) - _stop(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \ _start(s1) - _start(s2)
"Slices have to have same size" for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
]
copy_eq = AssignmentCollection(main_assignments=[Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))]) assert offset == [
config = CreateKernelConfig(iteration_slice=dst_slice, skip_independence_check=True) _stop(s1) - _stop(s2)
ast = create_cuda_kernel(copy_eq, config=config) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
if target == Target.GPU: ], "Slices have to have same size"
from pystencils.gpucuda import make_python_function
return make_python_function(ast) copy_eq = AssignmentCollection(
else: main_assignments=[
raise ValueError('Invalid target:', target) Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))
]
)
config = CreateKernelConfig(
iteration_slice=dst_slice,
skip_independence_check=True,
target=Target.GPU,
)
ast = create_kernel(copy_eq, config=config)
return ast.compile()
def _extend_dir(direction): def _extend_dir(direction):
...@@ -196,10 +237,10 @@ def _extend_dir(direction): ...@@ -196,10 +237,10 @@ def _extend_dir(direction):
elif direction[0] == 0: elif direction[0] == 0:
for d in [-1, 0, 1]: for d in [-1, 0, 1]:
for rest in _extend_dir(direction[1:]): for rest in _extend_dir(direction[1:]):
yield (d, ) + rest yield (d,) + rest
else: else:
for rest in _extend_dir(direction[1:]): for rest in _extend_dir(direction[1:]):
yield (direction[0], ) + rest yield (direction[0],) + rest
def _get_neighbour_transform(direction, ghost_layers): def _get_neighbour_transform(direction, ghost_layers):
......
...@@ -157,7 +157,7 @@ class MuskerLaw(ImplicitWallFunctionModel): ...@@ -157,7 +157,7 @@ class MuskerLaw(ImplicitWallFunctionModel):
def law(u_p, y_p): def law(u_p, y_p):
arctan = sp.Float(5.424) * sp.atan(sp.Float(0.119760479041916168) * y_p - sp.Float(0.488023952095808383)) arctan = sp.Float(5.424) * sp.atan(sp.Float(0.119760479041916168) * y_p - sp.Float(0.488023952095808383))
logarithm = (sp.Float(0.434) * sp.log((y_p + sp.Float(10.6)) ** sp.Float(9.6) logarithm = (sp.Float(0.434) * sp.log((y_p + sp.Float(10.6)) ** sp.Float(9.6)
/ (y_p ** 2 - sp.Float(8.15) * y_p + sp.Float(86)) ** 2, 10)) / (y_p ** 2 - sp.Float(8.15) * y_p + sp.Float(86)) ** 2))
return (arctan + logarithm - sp.Float(3.50727901936264842)) - u_p return (arctan + logarithm - sp.Float(3.50727901936264842)) - u_p
u_plus = velocity_symbol / self.u_tau[0] u_plus = velocity_symbol / self.u_tau[0]
......
...@@ -67,7 +67,8 @@ from lbmpy.enums import Stencil, Method, ForceModel, CollisionSpace, SubgridScal ...@@ -67,7 +67,8 @@ from lbmpy.enums import Stencil, Method, ForceModel, CollisionSpace, SubgridScal
import lbmpy.forcemodels as forcemodels import lbmpy.forcemodels as forcemodels
from lbmpy.fieldaccess import CollideOnlyInplaceAccessor, PdfFieldAccessor, PeriodicTwoFieldsAccessor from lbmpy.fieldaccess import CollideOnlyInplaceAccessor, PdfFieldAccessor, PeriodicTwoFieldsAccessor
from lbmpy.fluctuatinglb import add_fluctuations_to_collision_rule from lbmpy.fluctuatinglb import add_fluctuations_to_collision_rule
from lbmpy.partially_saturated_cells import add_psm_to_collision_rule, PSMConfig from lbmpy.partially_saturated_cells import (replace_by_psm_collision_rule, PSMConfig,
add_psm_solid_collision_to_collision_rule)
from lbmpy.non_newtonian_models import add_cassons_model, CassonsParameters from lbmpy.non_newtonian_models import add_cassons_model, CassonsParameters
from lbmpy.methods import (create_mrt_orthogonal, create_mrt_raw, create_central_moment, from lbmpy.methods import (create_mrt_orthogonal, create_mrt_raw, create_central_moment,
create_srt, create_trt, create_trt_kbc) create_srt, create_trt, create_trt_kbc)
...@@ -376,8 +377,8 @@ class LBMConfig: ...@@ -376,8 +377,8 @@ class LBMConfig:
if not self.compressible and self.method in (Method.MONOMIAL_CUMULANT, Method.CUMULANT): if not self.compressible and self.method in (Method.MONOMIAL_CUMULANT, Method.CUMULANT):
raise ValueError("Incompressible cumulant-based methods are not supported (yet).") raise ValueError("Incompressible cumulant-based methods are not supported (yet).")
if self.zero_centered and (self.entropic or self.fluctuating): if self.zero_centered and self.entropic:
raise ValueError("Entropic and fluctuating methods can only be created with `zero_centered=False`.") raise ValueError("Entropic methods can only be created with `zero_centered=False`.")
# Check or infer delta-equilibrium # Check or infer delta-equilibrium
if self.delta_equilibrium is not None: if self.delta_equilibrium is not None:
...@@ -468,7 +469,7 @@ class LBMConfig: ...@@ -468,7 +469,7 @@ class LBMConfig:
} }
if self.psm_config is not None and self.psm_config.fraction_field is not None: if self.psm_config is not None and self.psm_config.fraction_field is not None:
self.force = [(1.0 - self.psm_config.fraction_field.center) * f for f in self.force] self.force = [(1.0 - self.psm_config.fraction_field_symbol) * f for f in self.force]
if isinstance(self.force_model, str): if isinstance(self.force_model, str):
new_force_model = ForceModel[self.force_model.upper()] new_force_model = ForceModel[self.force_model.upper()]
...@@ -684,11 +685,6 @@ def create_lb_collision_rule(lb_method=None, lbm_config=None, lbm_optimisation=N ...@@ -684,11 +685,6 @@ def create_lb_collision_rule(lb_method=None, lbm_config=None, lbm_optimisation=N
else: else:
collision_rule = lb_method.get_collision_rule(pre_simplification=pre_simplification) collision_rule = lb_method.get_collision_rule(pre_simplification=pre_simplification)
if lbm_config.psm_config is not None:
if lbm_config.psm_config.fraction_field is None or lbm_config.psm_config.object_velocity_field is None:
raise ValueError("Specify a fraction and object velocity field in the PSM Config")
collision_rule = add_psm_to_collision_rule(collision_rule, lbm_config.psm_config)
if lbm_config.galilean_correction: if lbm_config.galilean_correction:
from lbmpy.methods.cumulantbased import add_galilean_correction from lbmpy.methods.cumulantbased import add_galilean_correction
collision_rule = add_galilean_correction(collision_rule) collision_rule = add_galilean_correction(collision_rule)
...@@ -706,6 +702,11 @@ def create_lb_collision_rule(lb_method=None, lbm_config=None, lbm_optimisation=N ...@@ -706,6 +702,11 @@ def create_lb_collision_rule(lb_method=None, lbm_config=None, lbm_optimisation=N
bulk_relaxation_rate=lbm_config.relaxation_rates[1], bulk_relaxation_rate=lbm_config.relaxation_rates[1],
limiter=cumulant_limiter) limiter=cumulant_limiter)
if lbm_config.psm_config is not None:
if lbm_config.psm_config.fraction_field is None or lbm_config.psm_config.object_velocity_field is None:
raise ValueError("Specify a fraction and object velocity field in the PSM Config")
collision_rule = replace_by_psm_collision_rule(collision_rule, lbm_config.psm_config)
if lbm_config.entropic: if lbm_config.entropic:
if lbm_config.subgrid_scale_model or lbm_config.cassons: if lbm_config.subgrid_scale_model or lbm_config.cassons:
raise ValueError("Choose either entropic, subgrid-scale or cassons") raise ValueError("Choose either entropic, subgrid-scale or cassons")
...@@ -783,7 +784,7 @@ def create_lb_method(lbm_config=None, **params): ...@@ -783,7 +784,7 @@ def create_lb_method(lbm_config=None, **params):
if lbm_config.psm_config is None: if lbm_config.psm_config is None:
fraction_field = None fraction_field = None
else: else:
fraction_field = lbm_config.psm_config.fraction_field fraction_field = lbm_config.psm_config.fraction_field_symbol
common_params = { common_params = {
'compressible': lbm_config.compressible, 'compressible': lbm_config.compressible,
...@@ -869,49 +870,36 @@ def create_lb_method(lbm_config=None, **params): ...@@ -869,49 +870,36 @@ def create_lb_method(lbm_config=None, **params):
def create_psm_update_rule(lbm_config, lbm_optimisation): def create_psm_update_rule(lbm_config, lbm_optimisation):
node_collection = []
# Use regular lb update rule for no overlapping particles if lbm_config.psm_config is None:
config_without_psm = copy.deepcopy(lbm_config) raise ValueError("Specify a PSM Config in the LBM Config, when creating a psm update rule")
config_without_psm.psm_config = None
# TODO: the force is still multiplied by (1.0 - self.psm_config.fraction_field.center) config_without_particles = copy.deepcopy(lbm_config)
# (should not harm if memory bound since self.psm_config.fraction_field.center should always be 0.0) config_without_particles.psm_config.max_particles_per_cell = 0
lb_update_rule = create_lb_update_rule( lb_update_rule = create_lb_update_rule(
lbm_config=config_without_psm, lbm_optimisation=lbm_optimisation lbm_config=config_without_particles, lbm_optimisation=lbm_optimisation)
)
node_collection.append( node_collection = lb_update_rule.all_assignments
Conditional(
lbm_config.psm_config.fraction_field.center(0) <= 0.0,
Block(lb_update_rule.all_assignments),
)
)
# Only one particle, i.e., no individual_fraction_field is provided
if lbm_config.psm_config.individual_fraction_field is None: if lbm_config.psm_config.individual_fraction_field is None:
assert lbm_config.psm_config.MaxParticlesPerCell == 1 assert lbm_config.psm_config.max_particles_per_cell == 1
fraction_field = lbm_config.psm_config.fraction_field
else:
fraction_field = lbm_config.psm_config.individual_fraction_field
for p in range(lbm_config.psm_config.max_particles_per_cell):
psm_solid_collision = add_psm_solid_collision_to_collision_rule(lb_update_rule, lbm_config, p)
psm_update_rule = create_lb_update_rule( psm_update_rule = create_lb_update_rule(
lbm_config=lbm_config, lbm_optimisation=lbm_optimisation collision_rule=psm_solid_collision, lbm_config=lbm_config, lbm_optimisation=lbm_optimisation)
)
node_collection.append( node_collection.append(
Conditional( Conditional(
lbm_config.psm_config.fraction_field.center(0) > 0.0, fraction_field.center(p) > 0.0,
Block(psm_update_rule.all_assignments), Block(psm_update_rule.all_assignments),
) )
) )
else:
for p in range(lbm_config.psm_config.MaxParticlesPerCell):
# Add psm update rule for p overlapping particles
config_with_p_particles = copy.deepcopy(lbm_config)
config_with_p_particles.psm_config.MaxParticlesPerCell = p + 1
psm_update_rule = create_lb_update_rule(
lbm_config=config_with_p_particles, lbm_optimisation=lbm_optimisation
)
node_collection.append(
Conditional(
lbm_config.psm_config.individual_fraction_field.center(p) > 0.0,
Block(psm_update_rule.all_assignments),
)
)
return NodeCollection(node_collection) return NodeCollection(node_collection)
......
...@@ -68,7 +68,7 @@ class LbmWeightInfo(CustomCodeNode): ...@@ -68,7 +68,7 @@ class LbmWeightInfo(CustomCodeNode):
weights = [f"(({self.weights_symbol.dtype.c_name})({str(w.evalf(17))}))" for w in lb_method.weights] weights = [f"(({self.weights_symbol.dtype.c_name})({str(w.evalf(17))}))" for w in lb_method.weights]
weights = ", ".join(weights) weights = ", ".join(weights)
w_sym = self.weights_symbol w_sym = self.weights_symbol
code = f"const {self.weights_symbol.dtype.c_name} {w_sym.name} [] = {{{ weights }}};\n" code = f"const {self.weights_symbol.dtype.c_name} {w_sym.name} [] = {{{weights}}};\n"
super(LbmWeightInfo, self).__init__(code, symbols_read=set(), symbols_defined={w_sym}) super(LbmWeightInfo, self).__init__(code, symbols_read=set(), symbols_defined={w_sym})
def weight_of_direction(self, dir_idx, lb_method=None): def weight_of_direction(self, dir_idx, lb_method=None):
......
...@@ -19,9 +19,7 @@ def add_fluctuations_to_collision_rule(collision_rule, temperature=None, amplitu ...@@ -19,9 +19,7 @@ def add_fluctuations_to_collision_rule(collision_rule, temperature=None, amplitu
"""""" """"""
if not (temperature and not amplitudes) or (temperature and amplitudes): if not (temperature and not amplitudes) or (temperature and amplitudes):
raise ValueError("Fluctuating LBM: Pass either 'temperature' or 'amplitudes'.") raise ValueError("Fluctuating LBM: Pass either 'temperature' or 'amplitudes'.")
if collision_rule.method.conserved_quantity_computation.zero_centered_pdfs:
raise ValueError("The fluctuating LBM is not implemented for zero-centered PDF storage.")
method = collision_rule.method method = collision_rule.method
if not amplitudes: if not amplitudes:
amplitudes = fluctuation_amplitude_from_temperature(method, temperature, c_s_sq) amplitudes = fluctuation_amplitude_from_temperature(method, temperature, c_s_sq)
...@@ -44,9 +42,7 @@ def fluctuation_amplitude_from_temperature(method, temperature, c_s_sq=sp.Symbol ...@@ -44,9 +42,7 @@ def fluctuation_amplitude_from_temperature(method, temperature, c_s_sq=sp.Symbol
"""Produces amplitude equations according to (2.60) and (3.54) in Schiller08""" """Produces amplitude equations according to (2.60) and (3.54) in Schiller08"""
normalization_factors = sp.matrix_multiply_elementwise(method.moment_matrix, method.moment_matrix) * \ normalization_factors = sp.matrix_multiply_elementwise(method.moment_matrix, method.moment_matrix) * \
sp.Matrix(method.weights) sp.Matrix(method.weights)
density = method.zeroth_order_equilibrium_moment_symbol density = method._cqc.density_symbol
if method.conserved_quantity_computation.zero_centered_pdfs:
density += 1
mu = temperature * density / c_s_sq mu = temperature * density / c_s_sq
return [sp.sqrt(mu * norm * (1 - (1 - rr) ** 2)) return [sp.sqrt(mu * norm * (1 - (1 - rr) ** 2))
for norm, rr in zip(normalization_factors, method.relaxation_rates)] for norm, rr in zip(normalization_factors, method.relaxation_rates)]
......
import functools import functools
import sympy as sp
from copy import deepcopy from copy import deepcopy
from lbmpy.simplificationfactory import create_simplification_strategy from lbmpy.simplificationfactory import create_simplification_strategy
from pystencils import create_kernel, CreateKernelConfig from pystencils import create_kernel, CreateKernelConfig, Assignment
from pystencils.field import Field, get_layout_of_array from pystencils.field import Field, get_layout_of_array
from pystencils.enums import Target from pystencils.enums import Target
from lbmpy.advanced_streaming.utility import get_accessor, Timestep from lbmpy.advanced_streaming.utility import get_accessor, Timestep
from lbmpy.relaxationrates import get_shear_relaxation_rate
from lbmpy.utils import second_order_moment_tensor
def pdf_initialization_assignments(lb_method, density, velocity, pdfs, def get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, pre_collision_pdfs):
streaming_pattern='pull', previous_timestep=Timestep.BOTH,
set_pre_collision_pdfs=False):
"""Assignments to initialize the pdf field with equilibrium"""
if isinstance(pdfs, Field): if isinstance(pdfs, Field):
accessor = get_accessor(streaming_pattern, previous_timestep) accessor = get_accessor(streaming_pattern, previous_timestep)
if set_pre_collision_pdfs: if pre_collision_pdfs:
field_accesses = accessor.read(pdfs, lb_method.stencil) field_accesses = accessor.read(pdfs, lb_method.stencil)
else: else:
field_accesses = accessor.write(pdfs, lb_method.stencil) field_accesses = accessor.write(pdfs, lb_method.stencil)
elif streaming_pattern == 'pull' and not set_pre_collision_pdfs: elif streaming_pattern == 'pull' and not pre_collision_pdfs:
field_accesses = pdfs field_accesses = pdfs
else: else:
raise ValueError("Invalid value of pdfs: A PDF field reference is required to derive " raise ValueError("Invalid value of pdfs: A PDF field reference is required to derive "
+ f"initialization assignments for streaming pattern {streaming_pattern}.") + f"initialization assignments for streaming pattern {streaming_pattern}.")
return field_accesses
def get_individual_or_common_fraction_field(psm_config):
if psm_config.individual_fraction_field is not None:
return psm_config.individual_fraction_field
else:
return psm_config.fraction_field
def pdf_initialization_assignments(lb_method, density, velocity, pdfs, psm_config=None,
streaming_pattern='pull', previous_timestep=Timestep.BOTH,
set_pre_collision_pdfs=False):
"""Assignments to initialize the pdf field with equilibrium"""
field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, set_pre_collision_pdfs)
if isinstance(density, Field): if isinstance(density, Field):
density = density.center density = density.center
if isinstance(velocity, Field): if isinstance(velocity, Field):
velocity = velocity.center_vector velocity = velocity.center_vector
...@@ -35,23 +49,40 @@ def pdf_initialization_assignments(lb_method, density, velocity, pdfs, ...@@ -35,23 +49,40 @@ def pdf_initialization_assignments(lb_method, density, velocity, pdfs,
setter_eqs = lb_method.get_equilibrium(conserved_quantity_equations=inp_eqs) setter_eqs = lb_method.get_equilibrium(conserved_quantity_equations=inp_eqs)
setter_eqs = setter_eqs.new_with_substitutions({sym: field_accesses[i] setter_eqs = setter_eqs.new_with_substitutions({sym: field_accesses[i]
for i, sym in enumerate(lb_method.post_collision_pdf_symbols)}) for i, sym in enumerate(lb_method.post_collision_pdf_symbols)})
if lb_method.fraction_field is not None:
if psm_config is None:
raise ValueError("If setting up LBM with PSM, please specify a PSM config in the macroscopic setter")
else:
for equ in setter_eqs:
if equ.lhs in lb_method.first_order_equilibrium_moment_symbols:
pos = lb_method.first_order_equilibrium_moment_symbols.index(equ.lhs)
new_rhs = 0
if isinstance(equ.rhs, sp.core.Add):
for summand in equ.rhs.args:
if summand in velocity:
new_rhs += (1.0 - psm_config.fraction_field.center) * summand
else:
new_rhs += summand.subs(lb_method.fraction_field, psm_config.fraction_field.center)
else:
new_rhs += (1.0 - psm_config.fraction_field.center) * equ.rhs
fraction_field = get_individual_or_common_fraction_field(psm_config)
for p in range(psm_config.max_particles_per_cell):
new_rhs += psm_config.object_velocity_field(p * lb_method.dim + pos) * fraction_field.center(p)
setter_eqs.subexpressions.remove(equ)
setter_eqs.subexpressions.append(Assignment(equ.lhs, new_rhs))
return setter_eqs return setter_eqs
def macroscopic_values_getter(lb_method, density, velocity, pdfs, def macroscopic_values_getter(lb_method, density, velocity, pdfs, psm_config=None,
streaming_pattern='pull', previous_timestep=Timestep.BOTH, streaming_pattern='pull', previous_timestep=Timestep.BOTH,
use_pre_collision_pdfs=False): use_pre_collision_pdfs=False):
if isinstance(pdfs, Field):
accessor = get_accessor(streaming_pattern, previous_timestep) field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, use_pre_collision_pdfs)
if use_pre_collision_pdfs:
field_accesses = accessor.read(pdfs, lb_method.stencil)
else:
field_accesses = accessor.write(pdfs, lb_method.stencil)
elif streaming_pattern == 'pull' and not use_pre_collision_pdfs:
field_accesses = pdfs
else:
raise ValueError("Invalid value of pdfs: A PDF field reference is required to derive "
+ f"getter assignments for streaming pattern {streaming_pattern}.")
cqc = lb_method.conserved_quantity_computation cqc = lb_method.conserved_quantity_computation
assert not (velocity is None and density is None) assert not (velocity is None and density is None)
output_spec = {} output_spec = {}
...@@ -59,12 +90,54 @@ def macroscopic_values_getter(lb_method, density, velocity, pdfs, ...@@ -59,12 +90,54 @@ def macroscopic_values_getter(lb_method, density, velocity, pdfs,
output_spec['velocity'] = velocity output_spec['velocity'] = velocity
if density is not None: if density is not None:
output_spec['density'] = density output_spec['density'] = density
return cqc.output_equations_from_pdfs(field_accesses, output_spec) getter_equ = cqc.output_equations_from_pdfs(field_accesses, output_spec)
if lb_method.fraction_field is not None:
if psm_config.fraction_field is None:
raise ValueError("If setting up LBM with PSM, please specify a PSM config in the macroscopic getter")
else:
if lb_method.force_model is not None:
for equ in getter_equ:
if equ.lhs in lb_method.force_model.symbolic_force_vector:
new_rhs = equ.rhs.subs(lb_method.fraction_field, psm_config.fraction_field.center)
getter_equ.subexpressions.remove(equ)
getter_equ.subexpressions.append(Assignment(equ.lhs, new_rhs))
for i, equ in enumerate(getter_equ.main_assignments[-lb_method.dim:]):
new_rhs = (1.0 - psm_config.fraction_field.center) * equ.rhs
fraction_field = get_individual_or_common_fraction_field(psm_config)
for p in range(psm_config.max_particles_per_cell):
new_rhs += psm_config.object_velocity_field(p * lb_method.dim + i) * fraction_field.center(p)
getter_equ.main_assignments.remove(equ)
getter_equ.main_assignments.append(Assignment(equ.lhs, new_rhs))
getter_equ.topological_sort()
return getter_equ
macroscopic_values_setter = pdf_initialization_assignments macroscopic_values_setter = pdf_initialization_assignments
def strain_rate_tensor_getter(lb_method, strain_rate_tensor, pdfs, streaming_pattern='pull',
previous_timestep=Timestep.BOTH, use_pre_collision_pdfs=False):
field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, use_pre_collision_pdfs)
if isinstance(strain_rate_tensor, Field):
strain_rate_tensor = strain_rate_tensor.center_vector
omega_s = get_shear_relaxation_rate(lb_method)
equilibrium = lb_method.equilibrium_distribution
rho = equilibrium.density if equilibrium.compressible else equilibrium.background_density
f_neq = sp.Matrix([field_accesses[i] for i in range(lb_method.stencil.Q)]) - lb_method.get_equilibrium_terms()
pi = second_order_moment_tensor(f_neq, lb_method.stencil)
strain_rate_tensor_equ = - 1.5 * (omega_s / rho) * pi
assignments = [Assignment(strain_rate_tensor[i * lb_method.stencil.D + j], strain_rate_tensor_equ[i, j])
for i in range(lb_method.stencil.D) for j in range(lb_method.stencil.D)]
return assignments
def compile_macroscopic_values_getter(lb_method, output_quantities, pdf_arr=None, def compile_macroscopic_values_getter(lb_method, output_quantities, pdf_arr=None,
ghost_layers=1, iteration_slice=None, ghost_layers=1, iteration_slice=None,
field_layout='numpy', target=Target.CPU, field_layout='numpy', target=Target.CPU,
......
...@@ -124,7 +124,8 @@ class AbstractLbMethod(abc.ABC): ...@@ -124,7 +124,8 @@ class AbstractLbMethod(abc.ABC):
substitutions = [Assignment(e[1], e[0]) for e in subexpressions.items()] substitutions = [Assignment(e[1], e[0]) for e in subexpressions.items()]
if relaxation_rates_modifier is not None: if relaxation_rates_modifier is not None:
symbolic_relaxation_rates = [r * relaxation_rates_modifier for r in symbolic_relaxation_rates] symbolic_relaxation_rates = [r * relaxation_rates_modifier for r in symbolic_relaxation_rates]
for srr in symbolic_relaxation_rates: else:
assert isinstance(srr, sp.Symbol) for srr in symbolic_relaxation_rates:
assert isinstance(srr, sp.Symbol)
return substitutions, sp.diag(*symbolic_relaxation_rates) return substitutions, sp.diag(*symbolic_relaxation_rates)
...@@ -201,19 +201,23 @@ def create_from_equilibrium(stencil, equilibrium, conserved_quantity_computation ...@@ -201,19 +201,23 @@ def create_from_equilibrium(stencil, equilibrium, conserved_quantity_computation
if cspace.collision_space == CollisionSpace.POPULATIONS: if cspace.collision_space == CollisionSpace.POPULATIONS:
return MomentBasedLbMethod(stencil, equilibrium, mom_to_rr_dict, conserved_quantity_computation=cqc, return MomentBasedLbMethod(stencil, equilibrium, mom_to_rr_dict, conserved_quantity_computation=cqc,
force_model=force_model, zero_centered=zero_centered, fraction_field=fraction_field, force_model=force_model, zero_centered=zero_centered,
fraction_field=fraction_field,
moment_transform_class=None) moment_transform_class=None)
elif cspace.collision_space == CollisionSpace.RAW_MOMENTS: elif cspace.collision_space == CollisionSpace.RAW_MOMENTS:
return MomentBasedLbMethod(stencil, equilibrium, mom_to_rr_dict, conserved_quantity_computation=cqc, return MomentBasedLbMethod(stencil, equilibrium, mom_to_rr_dict, conserved_quantity_computation=cqc,
force_model=force_model, zero_centered=zero_centered, fraction_field=fraction_field, force_model=force_model, zero_centered=zero_centered,
fraction_field=fraction_field,
moment_transform_class=cspace.raw_moment_transform_class) moment_transform_class=cspace.raw_moment_transform_class)
elif cspace.collision_space == CollisionSpace.CENTRAL_MOMENTS: elif cspace.collision_space == CollisionSpace.CENTRAL_MOMENTS:
return CentralMomentBasedLbMethod(stencil, equilibrium, mom_to_rr_dict, conserved_quantity_computation=cqc, return CentralMomentBasedLbMethod(stencil, equilibrium, mom_to_rr_dict, conserved_quantity_computation=cqc,
force_model=force_model, zero_centered=zero_centered, force_model=force_model, zero_centered=zero_centered,
fraction_field=fraction_field,
central_moment_transform_class=cspace.central_moment_transform_class) central_moment_transform_class=cspace.central_moment_transform_class)
elif cspace.collision_space == CollisionSpace.CUMULANTS: elif cspace.collision_space == CollisionSpace.CUMULANTS:
return CumulantBasedLbMethod(stencil, equilibrium, mom_to_rr_dict, conserved_quantity_computation=cqc, return CumulantBasedLbMethod(stencil, equilibrium, mom_to_rr_dict, conserved_quantity_computation=cqc,
force_model=force_model, zero_centered=zero_centered, force_model=force_model, zero_centered=zero_centered,
fraction_field=fraction_field,
central_moment_transform_class=cspace.central_moment_transform_class, central_moment_transform_class=cspace.central_moment_transform_class,
cumulant_transform_class=cspace.cumulant_transform_class) cumulant_transform_class=cspace.cumulant_transform_class)
...@@ -334,7 +338,7 @@ def create_mrt_raw(stencil, relaxation_rates, continuous_equilibrium=True, conse ...@@ -334,7 +338,7 @@ def create_mrt_raw(stencil, relaxation_rates, continuous_equilibrium=True, conse
def create_central_moment(stencil, relaxation_rates, nested_moments=None, def create_central_moment(stencil, relaxation_rates, nested_moments=None,
continuous_equilibrium=True, conserved_moments=True, fraction_field=None, **kwargs): continuous_equilibrium=True, conserved_moments=True, **kwargs):
r""" r"""
Creates moment based LB method where the collision takes place in the central moment space. Creates moment based LB method where the collision takes place in the central moment space.
...@@ -348,7 +352,6 @@ def create_central_moment(stencil, relaxation_rates, nested_moments=None, ...@@ -348,7 +352,6 @@ def create_central_moment(stencil, relaxation_rates, nested_moments=None,
continuous_equilibrium: determines if the discrete or continuous maxwellian equilibrium is continuous_equilibrium: determines if the discrete or continuous maxwellian equilibrium is
used to compute the equilibrium moments. used to compute the equilibrium moments.
conserved_moments: If lower order moments are conserved or not. conserved_moments: If lower order moments are conserved or not.
fraction_field: fraction field for the PSM method
Returns: Returns:
:class:`lbmpy.methods.momentbased.CentralMomentBasedLbMethod` instance :class:`lbmpy.methods.momentbased.CentralMomentBasedLbMethod` instance
""" """
...@@ -371,8 +374,8 @@ def create_central_moment(stencil, relaxation_rates, nested_moments=None, ...@@ -371,8 +374,8 @@ def create_central_moment(stencil, relaxation_rates, nested_moments=None,
nested_moments = cascaded_moment_sets_literature(stencil) nested_moments = cascaded_moment_sets_literature(stencil)
rr_dict = _get_relaxation_info_dict(relaxation_rates, nested_moments, stencil.D, conserved_moments) rr_dict = _get_relaxation_info_dict(relaxation_rates, nested_moments, stencil.D, conserved_moments)
if fraction_field is not None: if 'fraction_field' in kwargs and kwargs['fraction_field'] is not None:
relaxation_rates_modifier = (1.0 - fraction_field.center) relaxation_rates_modifier = (1.0 - kwargs['fraction_field'])
rr_dict = _get_relaxation_info_dict(relaxation_rates, nested_moments, stencil.D, rr_dict = _get_relaxation_info_dict(relaxation_rates, nested_moments, stencil.D,
relaxation_rates_modifier=relaxation_rates_modifier) relaxation_rates_modifier=relaxation_rates_modifier)
...@@ -527,7 +530,7 @@ def create_mrt_orthogonal(stencil, relaxation_rates, continuous_equilibrium=True ...@@ -527,7 +530,7 @@ def create_mrt_orthogonal(stencil, relaxation_rates, continuous_equilibrium=True
# ----------------------------------------- Cumulant method creators --------------------------------------------------- # ----------------------------------------- Cumulant method creators ---------------------------------------------------
def create_cumulant(stencil, relaxation_rates, cumulant_groups, conserved_moments=True, fraction_field=None, **kwargs): def create_cumulant(stencil, relaxation_rates, cumulant_groups, conserved_moments=True, **kwargs):
r"""Creates a cumulant-based lattice Boltzmann method. r"""Creates a cumulant-based lattice Boltzmann method.
Args: Args:
...@@ -547,8 +550,8 @@ def create_cumulant(stencil, relaxation_rates, cumulant_groups, conserved_moment ...@@ -547,8 +550,8 @@ def create_cumulant(stencil, relaxation_rates, cumulant_groups, conserved_moment
""" """
cumulant_to_rr_dict = _get_relaxation_info_dict(relaxation_rates, cumulant_groups, stencil.D, conserved_moments) cumulant_to_rr_dict = _get_relaxation_info_dict(relaxation_rates, cumulant_groups, stencil.D, conserved_moments)
if fraction_field is not None: if 'fraction_field' in kwargs and kwargs['fraction_field'] is not None:
relaxation_rates_modifier = (1.0 - fraction_field.center) relaxation_rates_modifier = (1.0 - kwargs['fraction_field'])
cumulant_to_rr_dict = _get_relaxation_info_dict(relaxation_rates, cumulant_groups, stencil.D, cumulant_to_rr_dict = _get_relaxation_info_dict(relaxation_rates, cumulant_groups, stencil.D,
relaxation_rates_modifier=relaxation_rates_modifier) relaxation_rates_modifier=relaxation_rates_modifier)
......
...@@ -47,7 +47,7 @@ class CumulantBasedLbMethod(AbstractLbMethod): ...@@ -47,7 +47,7 @@ class CumulantBasedLbMethod(AbstractLbMethod):
def __init__(self, stencil, equilibrium, relaxation_dict, def __init__(self, stencil, equilibrium, relaxation_dict,
conserved_quantity_computation=None, conserved_quantity_computation=None,
force_model=None, zero_centered=False, force_model=None, zero_centered=False, fraction_field=None,
central_moment_transform_class=BinomialChimeraTransform, central_moment_transform_class=BinomialChimeraTransform,
cumulant_transform_class=CentralMomentsToCumulantsByGeneratingFunc): cumulant_transform_class=CentralMomentsToCumulantsByGeneratingFunc):
assert isinstance(conserved_quantity_computation, assert isinstance(conserved_quantity_computation,
...@@ -63,6 +63,7 @@ class CumulantBasedLbMethod(AbstractLbMethod): ...@@ -63,6 +63,7 @@ class CumulantBasedLbMethod(AbstractLbMethod):
self._cqc = conserved_quantity_computation self._cqc = conserved_quantity_computation
self._force_model = force_model self._force_model = force_model
self._zero_centered = zero_centered self._zero_centered = zero_centered
self._fraction_field = fraction_field
self._weights = None self._weights = None
self._cumulant_transform_class = cumulant_transform_class self._cumulant_transform_class = cumulant_transform_class
self._central_moment_transform_class = central_moment_transform_class self._central_moment_transform_class = central_moment_transform_class
...@@ -72,6 +73,10 @@ class CumulantBasedLbMethod(AbstractLbMethod): ...@@ -72,6 +73,10 @@ class CumulantBasedLbMethod(AbstractLbMethod):
"""Force model employed by this method.""" """Force model employed by this method."""
return self._force_model return self._force_model
@property
def fraction_field(self):
return self._fraction_field
@property @property
def relaxation_info_dict(self): def relaxation_info_dict(self):
"""Dictionary mapping this method's cumulants to their relaxation rates and equilibrium values. """Dictionary mapping this method's cumulants to their relaxation rates and equilibrium values.
......
...@@ -55,7 +55,7 @@ class CentralMomentBasedLbMethod(AbstractLbMethod): ...@@ -55,7 +55,7 @@ class CentralMomentBasedLbMethod(AbstractLbMethod):
def __init__(self, stencil, equilibrium, relaxation_dict, def __init__(self, stencil, equilibrium, relaxation_dict,
conserved_quantity_computation=None, conserved_quantity_computation=None,
force_model=None, zero_centered=False, force_model=None, zero_centered=False, fraction_field=None,
central_moment_transform_class=BinomialChimeraTransform): central_moment_transform_class=BinomialChimeraTransform):
assert isinstance(conserved_quantity_computation, AbstractConservedQuantityComputation) assert isinstance(conserved_quantity_computation, AbstractConservedQuantityComputation)
super(CentralMomentBasedLbMethod, self).__init__(stencil) super(CentralMomentBasedLbMethod, self).__init__(stencil)
...@@ -65,6 +65,7 @@ class CentralMomentBasedLbMethod(AbstractLbMethod): ...@@ -65,6 +65,7 @@ class CentralMomentBasedLbMethod(AbstractLbMethod):
self._cqc = conserved_quantity_computation self._cqc = conserved_quantity_computation
self._force_model = force_model self._force_model = force_model
self._zero_centered = zero_centered self._zero_centered = zero_centered
self._fraction_field = fraction_field
self._weights = None self._weights = None
self._central_moment_transform_class = central_moment_transform_class self._central_moment_transform_class = central_moment_transform_class
...@@ -73,6 +74,10 @@ class CentralMomentBasedLbMethod(AbstractLbMethod): ...@@ -73,6 +74,10 @@ class CentralMomentBasedLbMethod(AbstractLbMethod):
"""Force model employed by this method.""" """Force model employed by this method."""
return self._force_model return self._force_model
@property
def fraction_field(self):
return self._fraction_field
@property @property
def relaxation_info_dict(self): def relaxation_info_dict(self):
"""Dictionary mapping this method's moments to their relaxation rates and equilibrium values. """Dictionary mapping this method's moments to their relaxation rates and equilibrium values.
......
...@@ -48,7 +48,7 @@ class MomentBasedLbMethod(AbstractLbMethod): ...@@ -48,7 +48,7 @@ class MomentBasedLbMethod(AbstractLbMethod):
self._cqc = conserved_quantity_computation self._cqc = conserved_quantity_computation
self._force_model = force_model self._force_model = force_model
self._zero_centered = zero_centered self._zero_centered = zero_centered
self.fraction_field = fraction_field self._fraction_field = fraction_field
self._weights = None self._weights = None
self._moment_transform_class = moment_transform_class self._moment_transform_class = moment_transform_class
...@@ -57,6 +57,10 @@ class MomentBasedLbMethod(AbstractLbMethod): ...@@ -57,6 +57,10 @@ class MomentBasedLbMethod(AbstractLbMethod):
"""Force model employed by this method.""" """Force model employed by this method."""
return self._force_model return self._force_model
@property
def fraction_field(self):
return self._fraction_field
@property @property
def relaxation_info_dict(self): def relaxation_info_dict(self):
"""Dictionary mapping this method's moments to their relaxation rates and equilibrium values. """Dictionary mapping this method's moments to their relaxation rates and equilibrium values.
...@@ -176,8 +180,8 @@ class MomentBasedLbMethod(AbstractLbMethod): ...@@ -176,8 +180,8 @@ class MomentBasedLbMethod(AbstractLbMethod):
def get_collision_rule(self, conserved_quantity_equations: AssignmentCollection = None, def get_collision_rule(self, conserved_quantity_equations: AssignmentCollection = None,
pre_simplification: bool = True) -> LbmCollisionRule: pre_simplification: bool = True) -> LbmCollisionRule:
if self.fraction_field is not None: if self._fraction_field is not None:
relaxation_rates_modifier = (1.0 - self.fraction_field.center) relaxation_rates_modifier = (1.0 - self._fraction_field)
rr_sub_expressions, d = self._generate_symbolic_relaxation_matrix( rr_sub_expressions, d = self._generate_symbolic_relaxation_matrix(
relaxation_rates_modifier=relaxation_rates_modifier) relaxation_rates_modifier=relaxation_rates_modifier)
else: else:
......
...@@ -172,7 +172,7 @@ class PdfsToMomentsByMatrixTransform(AbstractRawMomentTransform): ...@@ -172,7 +172,7 @@ class PdfsToMomentsByMatrixTransform(AbstractRawMomentTransform):
# ----------------------------- Private Members ----------------------------- # ----------------------------- Private Members -----------------------------
@ property @property
def _default_simplification(self): def _default_simplification(self):
forward_simp = SimplificationStrategy() forward_simp = SimplificationStrategy()
# forward_simp.add(substitute_moments_in_conserved_quantity_equations) # forward_simp.add(substitute_moments_in_conserved_quantity_equations)
...@@ -218,7 +218,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform): ...@@ -218,7 +218,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
self.moment_polynomials) self.moment_polynomials)
self.poly_to_mono_matrix = self.mono_to_poly_matrix.inv() self.poly_to_mono_matrix = self.mono_to_poly_matrix.inv()
@ property @property
def absorbs_conserved_quantity_equations(self): def absorbs_conserved_quantity_equations(self):
return True return True
...@@ -414,7 +414,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform): ...@@ -414,7 +414,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
# ----------------------------- Private Members ----------------------------- # ----------------------------- Private Members -----------------------------
@ property @property
def _default_simplification(self): def _default_simplification(self):
from lbmpy.methods.momentbased.momentbasedsimplifications import ( from lbmpy.methods.momentbased.momentbasedsimplifications import (
substitute_moments_in_conserved_quantity_equations, substitute_moments_in_conserved_quantity_equations,
......
import sympy as sp import sympy as sp
from dataclasses import dataclass from dataclasses import dataclass
from lbmpy.enums import Method
from lbmpy.methods.abstractlbmethod import LbmCollisionRule from lbmpy.methods.abstractlbmethod import LbmCollisionRule
from pystencils import Assignment, AssignmentCollection from pystencils import Assignment, AssignmentCollection
from pystencils.field import Field from pystencils.field import Field
...@@ -13,103 +14,156 @@ class PSMConfig: ...@@ -13,103 +14,156 @@ class PSMConfig:
Fraction field for PSM Fraction field for PSM
""" """
fraction_field_symbol = sp.Symbol('B')
"""
Fraction field symbol used for simplification
"""
object_velocity_field: Field = None object_velocity_field: Field = None
""" """
Object velocity field for PSM Object velocity field for PSM
""" """
SC: int = 1 solid_collision: int = 1
""" """
Solid collision option for PSM Solid collision option for PSM
""" """
MaxParticlesPerCell: int = 1 max_particles_per_cell: int = 1
""" """
Maximum number of particles overlapping with a cell Maximum number of particles overlapping with a cell
""" """
individual_fraction_field: Field = None individual_fraction_field: Field = None
""" """
Fraction field for each overlapping particle in PSM Fraction field for each overlapping object / particle in PSM
""" """
particle_force_field: Field = None object_force_field: Field = None
""" """
Force field for each overlapping particle in PSM Force field for each overlapping object / particle in PSM
""" """
def add_psm_to_collision_rule(collision_rule, psm_config): def get_psm_solid_collision_term(collision_rule, psm_config, particle_per_cell_counter):
if psm_config.individual_fraction_field is None: if psm_config.individual_fraction_field is None:
psm_config.individual_fraction_field = psm_config.fraction_field fraction_field = psm_config.fraction_field
else:
fraction_field = psm_config.individual_fraction_field
method = collision_rule.method method = collision_rule.method
pre_collision_pdf_symbols = method.pre_collision_pdf_symbols pre_collision_pdf_symbols = method.pre_collision_pdf_symbols
stencil = method.stencil stencil = method.stencil
# Get equilibrium from object velocity for solid collision
forces_rhs = [0] * psm_config.MaxParticlesPerCell * stencil.D
solid_collisions = [0] * stencil.Q solid_collisions = [0] * stencil.Q
for p in range(psm_config.MaxParticlesPerCell): equilibrium_fluid = method.get_equilibrium_terms()
equilibrium_fluid = method.get_equilibrium_terms() equilibrium_solid = []
equilibrium_solid = []
for eq in equilibrium_fluid: # get equilibrium form object velocity
eq_sol = eq for eq in equilibrium_fluid:
for i in range(stencil.D): eq_sol = eq
eq_sol = eq_sol.subs(sp.Symbol("u_" + str(i)), for i in range(stencil.D):
psm_config.object_velocity_field.center(p * stencil.D + i), ) eq_sol = eq_sol.subs(sp.Symbol("u_" + str(i)),
equilibrium_solid.append(eq_sol) psm_config.object_velocity_field.center(particle_per_cell_counter * stencil.D + i), )
equilibrium_solid.append(eq_sol)
# Build solid collision
for i, (eqFluid, eqSolid, f, offset) in enumerate( # Build solid collision
zip(equilibrium_fluid, equilibrium_solid, pre_collision_pdf_symbols, stencil)): for i, (eqFluid, eqSolid, f, offset) in enumerate(
inverse_direction_index = stencil.stencil_entries.index(stencil.inverse_stencil_entries[i]) zip(equilibrium_fluid, equilibrium_solid, pre_collision_pdf_symbols, stencil)):
if psm_config.SC == 1: inverse_direction_index = stencil.stencil_entries.index(stencil.inverse_stencil_entries[i])
solid_collision = psm_config.individual_fraction_field.center(p) * ( if psm_config.solid_collision == 1:
( solid_collision = (fraction_field.center(particle_per_cell_counter)
pre_collision_pdf_symbols[inverse_direction_index] * ((pre_collision_pdf_symbols[inverse_direction_index]
- equilibrium_fluid[inverse_direction_index] - equilibrium_fluid[inverse_direction_index]) - (f - eqSolid)))
) elif psm_config.solid_collision == 2:
- (f - eqSolid) # TODO get relaxation rate vector from method and use the right relaxation rate [i]
) solid_collision = (fraction_field.center(particle_per_cell_counter)
elif psm_config.SC == 2: * ((eqSolid - f) + (1.0 - method.relaxation_rates[0]) * (f - eqFluid)))
# TODO get relaxation rate vector from method and use the right relaxation rate [i] elif psm_config.solid_collision == 3:
solid_collision = psm_config.individual_fraction_field.center(p) * ( solid_collision = (fraction_field.center(particle_per_cell_counter)
(eqSolid - f) + (1.0 - method.relaxation_rates[0]) * (f - eqFluid) * ((pre_collision_pdf_symbols[inverse_direction_index]
) - equilibrium_solid[inverse_direction_index]) - (f - eqSolid)))
elif psm_config.SC == 3: else:
solid_collision = psm_config.individual_fraction_field.center(p) * ( raise ValueError("Only sc=1, sc=2 and sc=3 are supported.")
(
pre_collision_pdf_symbols[inverse_direction_index] solid_collisions[i] += solid_collision
- equilibrium_solid[inverse_direction_index]
) return solid_collisions
- (f - eqSolid)
)
else: def get_psm_force_from_solid_collision(solid_collisions, stencil, object_force_field, particle_per_cell_counter):
raise ValueError("Only SC=1, SC=2 and SC=3 are supported.") force_assignments = []
solid_collisions[i] += solid_collision for d in range(stencil.D):
for j in range(stencil.D): forces_rhs = 0
forces_rhs[p * stencil.D + j] -= solid_collision * int(offset[j]) for sc, offset in zip(solid_collisions, stencil):
forces_rhs -= sc * int(offset[d])
# Add solid collision to main assignments of collision rule
force_assignments.append(Assignment(
object_force_field.center(particle_per_cell_counter * stencil.D + d), forces_rhs
))
return AssignmentCollection(force_assignments)
def replace_fraction_symbol_with_field(assignments, fraction_field_symbol, fraction_field_access):
new_assignments = []
for ass in assignments:
rhs = ass.rhs.subs(fraction_field_symbol, fraction_field_access.center(0))
new_assignments.append(Assignment(ass.lhs, rhs))
return new_assignments
def add_psm_solid_collision_to_collision_rule(collision_rule, lbm_config, particle_per_cell_counter):
method = collision_rule.method
solid_collisions = get_psm_solid_collision_term(collision_rule, lbm_config.psm_config, particle_per_cell_counter)
post_collision_pdf_symbols = method.post_collision_pdf_symbols
assignments = []
for sc, post in zip(solid_collisions, post_collision_pdf_symbols):
assignments.append(Assignment(post, post + sc))
if lbm_config.psm_config.object_force_field is not None:
assignments += get_psm_force_from_solid_collision(solid_collisions, method.stencil,
lbm_config.psm_config.object_force_field,
particle_per_cell_counter)
# exchanging rho with zeroth order moment symbol
if lbm_config.method in (Method.CENTRAL_MOMENT, Method.MONOMIAL_CUMULANT, Method.CUMULANT):
new_assignments = []
zeroth_moment_symbol = 'm_00' if lbm_config.stencil.D == 2 else 'm_000'
for ass in assignments:
new_assignments.append(ass.subs(sp.Symbol('rho'), sp.Symbol(zeroth_moment_symbol)))
assignments = new_assignments
collision_assignments = AssignmentCollection(assignments)
ac = LbmCollisionRule(method, collision_assignments, [],
collision_rule.simplification_hints)
return ac
def replace_by_psm_collision_rule(collision_rule, psm_config):
method = collision_rule.method
collision_assignments = [] collision_assignments = []
for main, sc in zip(collision_rule.main_assignments, solid_collisions): solid_collisions = [0] * psm_config.max_particles_per_cell
collision_assignments.append(Assignment(main.lhs, main.rhs + sc)) for p in range(psm_config.max_particles_per_cell):
solid_collisions[p] = get_psm_solid_collision_term(collision_rule, psm_config, p)
# Add hydrodynamic force calculations to collision assignments if two-way coupling is used
# (i.e., force field is not None) if psm_config.object_force_field is not None:
if psm_config.particle_force_field is not None: collision_assignments += get_psm_force_from_solid_collision(solid_collisions[p], method.stencil,
for p in range(psm_config.MaxParticlesPerCell): psm_config.object_force_field, p)
for i in range(stencil.D):
collision_assignments.append( for i, main in enumerate(collision_rule.main_assignments):
Assignment( rhs = main.rhs
psm_config.particle_force_field.center(p * stencil.D + i), for p in range(psm_config.max_particles_per_cell):
forces_rhs[p * stencil.D + i], rhs += solid_collisions[p][i]
) collision_assignments.append(Assignment(main.lhs, rhs))
)
collision_assignments = AssignmentCollection(collision_assignments) collision_assignments = AssignmentCollection(collision_assignments)
ac = LbmCollisionRule(method, collision_assignments, collision_rule.subexpressions, ac = LbmCollisionRule(method, replace_fraction_symbol_with_field(collision_assignments,
psm_config.fraction_field_symbol, psm_config.fraction_field),
replace_fraction_symbol_with_field(collision_rule.subexpressions,
psm_config.fraction_field_symbol, psm_config.fraction_field),
collision_rule.simplification_hints) collision_rule.simplification_hints)
ac.topological_sort() ac.topological_sort()
return ac return ac
...@@ -120,6 +120,26 @@ def create_stream_pull_with_output_kernel(lb_method, src_field, dst_field=None, ...@@ -120,6 +120,26 @@ def create_stream_pull_with_output_kernel(lb_method, src_field, dst_field=None,
simplification_hints=output_eq_collection.simplification_hints) simplification_hints=output_eq_collection.simplification_hints)
def create_copy_kernel(stencil, src_field, dst_field, accessor=StreamPullTwoFieldsAccessor()):
"""Creates a copy kernel, which can be used to transfer information from src to dst field.
Args:
stencil: lattice Boltzmann stencil which is used in the form of a tuple of tuples
src_field: field used for reading pdf values
dst_field: field used for writing pdf values
accessor: instance of PdfFieldAccessor, defining where to read and write values
to create e.g. a fused stream-collide kernel See 'fieldaccess.PdfFieldAccessor'
Returns:
AssignmentCollection of a copy update rule
"""
temporary_symbols = sp.symbols(f'copied:{stencil.Q}')
subexpressions = [Assignment(tmp, acc) for tmp, acc in zip(temporary_symbols, accessor.write(src_field, stencil))]
main_assignments = [Assignment(acc, tmp) for acc, tmp in zip(accessor.write(dst_field, stencil), temporary_symbols)]
return AssignmentCollection(main_assignments, subexpressions=subexpressions)
# ---------------------------------- Pdf array creation for various layouts -------------------------------------------- # ---------------------------------- Pdf array creation for various layouts --------------------------------------------
......
...@@ -5,36 +5,52 @@ import numpy as np ...@@ -5,36 +5,52 @@ import numpy as np
from lbmpy.stencils import LBStencil from lbmpy.stencils import LBStencil
from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice
from lbmpy.creationfunctions import create_lb_update_rule, LBMConfig, LBMOptimisation from lbmpy.creationfunctions import create_lb_update_rule, LBMConfig, LBMOptimisation
from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices, \ from lbmpy.advanced_streaming.communication import (
LBMPeriodicityHandling get_communication_slices,
_fix_length_one_slices,
LBMPeriodicityHandling,
periodic_pdf_gpu_copy_kernel,
)
from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
from lbmpy.enums import Stencil from lbmpy.enums import Stencil
import pytest import pytest
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]) @pytest.mark.parametrize(
@pytest.mark.parametrize('streaming_pattern', streaming_patterns) "stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD]) )
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
def test_slices_not_empty(stencil, streaming_pattern, timestep): def test_slices_not_empty(stencil, streaming_pattern, timestep):
stencil = LBStencil(stencil) stencil = LBStencil(stencil)
arr = np.zeros((4,) * stencil.D + (stencil.Q,)) arr = np.zeros((4,) * stencil.D + (stencil.Q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, slices = get_communication_slices(
ghost_layers=1) stencil,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=1,
)
for _, slices_list in slices.items(): for _, slices_list in slices.items():
for src, dst in slices_list: for src, dst in slices_list:
assert all(s != 0 for s in arr[src].shape) assert all(s != 0 for s in arr[src].shape)
assert all(s != 0 for s in arr[dst].shape) assert all(s != 0 for s in arr[dst].shape)
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]) @pytest.mark.parametrize(
@pytest.mark.parametrize('streaming_pattern', streaming_patterns) "stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD]) )
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
def test_src_dst_same_shape(stencil, streaming_pattern, timestep): def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
stencil = LBStencil(stencil) stencil = LBStencil(stencil)
arr = np.zeros((4,) * stencil.D + (stencil.Q,)) arr = np.zeros((4,) * stencil.D + (stencil.Q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, slices = get_communication_slices(
ghost_layers=1) stencil,
streaming_pattern=streaming_pattern,
prev_timestep=timestep,
ghost_layers=1,
)
for _, slices_list in slices.items(): for _, slices_list in slices.items():
for src, dst in slices_list: for src, dst in slices_list:
src_shape = arr[src].shape src_shape = arr[src].shape
...@@ -42,12 +58,15 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep): ...@@ -42,12 +58,15 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
assert src_shape == dst_shape assert src_shape == dst_shape
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]) @pytest.mark.parametrize(
"stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
def test_pull_communication_slices(stencil): def test_pull_communication_slices(stencil):
stencil = LBStencil(stencil) stencil = LBStencil(stencil)
slices = get_communication_slices( slices = get_communication_slices(
stencil, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1) stencil, streaming_pattern="pull", prev_timestep=Timestep.BOTH, ghost_layers=1
)
for i, d in enumerate(stencil): for i, d in enumerate(stencil):
if i == 0: if i == 0:
continue continue
...@@ -58,21 +77,115 @@ def test_pull_communication_slices(stencil): ...@@ -58,21 +77,115 @@ def test_pull_communication_slices(stencil):
dst = s[1][:-1] dst = s[1][:-1]
break break
inner_slice = _fix_length_one_slices(get_slice_before_ghost_layer(d, ghost_layers=1)) inner_slice = _fix_length_one_slices(
get_slice_before_ghost_layer(d, ghost_layers=1)
)
inv_dir = (-e for e in d) inv_dir = (-e for e in d)
gl_slice = _fix_length_one_slices(get_ghost_region_slice(inv_dir, ghost_layers=1)) gl_slice = _fix_length_one_slices(
get_ghost_region_slice(inv_dir, ghost_layers=1)
)
assert src == inner_slice assert src == inner_slice
assert dst == gl_slice assert dst == gl_slice
@pytest.mark.parametrize('stencil_name', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]) @pytest.mark.parametrize("direction", LBStencil(Stencil.D3Q27).stencil_entries)
@pytest.mark.parametrize("pull", [False, True])
def test_gpu_comm_kernels(direction: tuple, pull: bool):
pytest.importorskip("cupy")
stencil = LBStencil(Stencil.D3Q27)
inv_dir = stencil[stencil.inverse_index(direction)]
target = ps.Target.GPU
domain_size = (4,) * stencil.D
dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
field = dh.add_array("field", values_per_cell=2)
if pull:
dst_slice = get_ghost_region_slice(inv_dir)
src_slice = get_slice_before_ghost_layer(direction)
else:
dst_slice = get_slice_before_ghost_layer(direction)
src_slice = get_ghost_region_slice(inv_dir)
src_slice += (1,)
dst_slice += (1,)
kernel = periodic_pdf_gpu_copy_kernel(field, src_slice, dst_slice)
dh.cpu_arrays[field.name][src_slice] = 42.0
dh.all_to_gpu()
dh.run_kernel(kernel)
dh.all_to_cpu()
np.testing.assert_equal(dh.cpu_arrays[field.name][dst_slice], 42.0)
@pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19])
@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
def test_direct_copy_and_kernels_equivalence(stencil: Stencil, streaming_pattern: str):
pytest.importorskip("cupy")
target = ps.Target.GPU
stencil = LBStencil(stencil)
domain_size = (4,) * stencil.D
dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
pdfs_a = dh.add_array("pdfs_a", values_per_cell=stencil.Q)
pdfs_b = dh.add_array("pdfs_b", values_per_cell=stencil.Q)
dh.fill(pdfs_a.name, 0.0, ghost_layers=True)
dh.fill(pdfs_b.name, 0.0, ghost_layers=True)
for q in range(stencil.Q):
sl = ps.make_slice[:4, :4, q] if stencil.D == 2 else ps.make_slice[:4, :4, :4, q]
dh.cpu_arrays[pdfs_a.name][sl] = q
dh.cpu_arrays[pdfs_b.name][sl] = q
dh.all_to_gpu()
direct_copy = LBMPeriodicityHandling(stencil, dh, pdfs_a.name, streaming_pattern, cupy_direct_copy=True)
kernels_copy = LBMPeriodicityHandling(stencil, dh, pdfs_b.name, streaming_pattern, cupy_direct_copy=False)
direct_copy(Timestep.EVEN)
kernels_copy(Timestep.EVEN)
dh.all_to_cpu()
np.testing.assert_equal(
dh.cpu_arrays[pdfs_a.name],
dh.cpu_arrays[pdfs_b.name]
)
@pytest.mark.parametrize(
"stencil_name", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
)
def test_optimised_and_full_communication_equivalence(stencil_name): def test_optimised_and_full_communication_equivalence(stencil_name):
target = ps.Target.CPU target = ps.Target.CPU
stencil = LBStencil(stencil_name) stencil = LBStencil(stencil_name)
domain_size = (4, ) * stencil.D domain_size = (4,) * stencil.D
dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D, dh = ps.create_data_handling(
parallel=False, default_target=target) domain_size,
periodicity=(True,) * stencil.D,
parallel=False,
default_target=target,
)
pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64) pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
dh.fill("pdf", 0, ghost_layers=True) dh.fill("pdf", 0, ghost_layers=True)
...@@ -82,9 +195,9 @@ def test_optimised_and_full_communication_equivalence(stencil_name): ...@@ -82,9 +195,9 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
gl = dh.ghost_layers_of_field("pdf") gl = dh.ghost_layers_of_field("pdf")
num = 0 num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
dh.cpu_arrays['pdf'][idx] = num dh.cpu_arrays["pdf"][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num dh.cpu_arrays["pdf_tmp"][idx] = num
num += 1 num += 1
lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only") lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only")
...@@ -95,21 +208,27 @@ def test_optimised_and_full_communication_equivalence(stencil_name): ...@@ -95,21 +208,27 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
ast = ps.create_kernel(ac, config=config) ast = ps.create_kernel(ac, config=config)
stream = ast.compile() stream = ast.compile()
full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True}) full_communication = dh.synchronization_function(
pdf.name, target=dh.default_target, optimization={"openmp": True}
)
full_communication() full_communication()
dh.run_kernel(stream) dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp") dh.swap("pdf", "pdf_tmp")
pdf_full_communication = np.copy(dh.cpu_arrays['pdf']) pdf_full_communication = np.copy(dh.cpu_arrays["pdf"])
num = 0 num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
dh.cpu_arrays['pdf'][idx] = num dh.cpu_arrays["pdf"][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num dh.cpu_arrays["pdf_tmp"][idx] = num
num += 1 num += 1
optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name, optimised_communication = LBMPeriodicityHandling(
streaming_pattern='pull') stencil=stencil,
data_handling=dh,
pdf_field_name=pdf.name,
streaming_pattern="pull",
)
optimised_communication() optimised_communication()
dh.run_kernel(stream) dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp") dh.swap("pdf", "pdf_tmp")
...@@ -119,9 +238,14 @@ def test_optimised_and_full_communication_equivalence(stencil_name): ...@@ -119,9 +238,14 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
for j in range(gl, domain_size[1]): for j in range(gl, domain_size[1]):
for k in range(gl, domain_size[2]): for k in range(gl, domain_size[2]):
for f in range(len(stencil)): for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f) assert (
dh.cpu_arrays["pdf"][i, j, k, f]
== pdf_full_communication[i, j, k, f]
), print(f)
else: else:
for i in range(gl, domain_size[0]): for i in range(gl, domain_size[0]):
for j in range(gl, domain_size[1]): for j in range(gl, domain_size[1]):
for f in range(len(stencil)): for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f] assert (
dh.cpu_arrays["pdf"][i, j, f] == pdf_full_communication[i, j, f]
)
...@@ -5,13 +5,16 @@ import pytest ...@@ -5,13 +5,16 @@ import pytest
import pystencils as ps import pystencils as ps
from pystencils import get_code_str from pystencils import get_code_str
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set from pystencils.backends.simd_instruction_sets import (
get_supported_instruction_sets,
get_vector_instruction_set,
)
from pystencils.cpu.cpujit import get_compiler_config from pystencils.cpu.cpujit import get_compiler_config
from pystencils.enums import Target from pystencils.enums import Target
from pystencils.rng import PhiloxTwoDoubles from pystencils.rng import PhiloxTwoDoubles
from lbmpy.creationfunctions import * from lbmpy.creationfunctions import *
from lbmpy.forcemodels import Guo from lbmpy.forcemodels import Guo
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
import numpy as np import numpy as np
from lbmpy.enums import Stencil from lbmpy.enums import Stencil
...@@ -20,13 +23,18 @@ from lbmpy.stencils import LBStencil ...@@ -20,13 +23,18 @@ from lbmpy.stencils import LBStencil
def _skip_instruction_sets_windows(instruction_sets): def _skip_instruction_sets_windows(instruction_sets):
if get_compiler_config()['os'] == 'windows': if get_compiler_config()["os"] == "windows":
# skip instruction sets supported by the CPU but not by the compiler # skip instruction sets supported by the CPU but not by the compiler
if 'avx' in instruction_sets and ('/arch:avx2' not in get_compiler_config()['flags'].lower() if "avx" in instruction_sets and (
and '/arch:avx512' not in get_compiler_config()['flags'].lower()): "/arch:avx2" not in get_compiler_config()["flags"].lower()
instruction_sets.remove('avx') and "/arch:avx512" not in get_compiler_config()["flags"].lower()
if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower(): ):
instruction_sets.remove('avx512') instruction_sets.remove("avx")
if (
"avx512" in instruction_sets
and "/arch:avx512" not in get_compiler_config()["flags"].lower()
):
instruction_sets.remove("avx512")
return instruction_sets return instruction_sets
...@@ -35,11 +43,13 @@ def single_component_maxwell(x1, x2, kT, mass): ...@@ -35,11 +43,13 @@ def single_component_maxwell(x1, x2, kT, mass):
x = np.linspace(x1, x2, 1000) x = np.linspace(x1, x2, 1000)
try: try:
trapezoid = np.trapezoid # since numpy 2.0 trapezoid = np.trapezoid # since numpy 2.0
except AttributeError: except AttributeError:
trapezoid = np.trapz trapezoid = np.trapz
return trapezoid(np.exp(-mass * x ** 2 / (2. * kT)), x) / np.sqrt(2. * np.pi * kT / mass) return trapezoid(np.exp(-mass * x**2 / (2.0 * kT)), x) / np.sqrt(
2.0 * np.pi * kT / mass
)
def rr_getter(moment_group): def rr_getter(moment_group):
...@@ -71,53 +81,86 @@ def second_order_moment_tensor_assignments(function_values, stencil, output_fiel ...@@ -71,53 +81,86 @@ def second_order_moment_tensor_assignments(function_values, stencil, output_fiel
"""Assignments for calculating the pressure tensor""" """Assignments for calculating the pressure tensor"""
assert len(function_values) == len(stencil) assert len(function_values) == len(stencil)
dim = len(stencil[0]) dim = len(stencil[0])
return [ps.Assignment(output_field(i, j), return [
sum(c[i] * c[j] * f for f, c in zip(function_values, stencil))) ps.Assignment(
for i in range(dim) for j in range(dim)] output_field(i, j),
sum(c[i] * c[j] * f for f, c in zip(function_values, stencil)),
)
for i in range(dim)
for j in range(dim)
]
def add_pressure_output_to_collision_rule(collision_rule, pressure_field): def add_pressure_output_to_collision_rule(collision_rule, pressure_field):
pressure_ouput = second_order_moment_tensor_assignments(collision_rule.method.pre_collision_pdf_symbols, pressure_ouput = second_order_moment_tensor_assignments(
collision_rule.method.stencil, pressure_field) collision_rule.method.pre_collision_pdf_symbols,
collision_rule.method.stencil,
pressure_field,
)
collision_rule.main_assignments = collision_rule.main_assignments + pressure_ouput collision_rule.main_assignments = collision_rule.main_assignments + pressure_ouput
def get_fluctuating_lb(size=None, kT=None, def get_fluctuating_lb(
omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, size=None,
rho_0=None, target=None): kT=None,
omega_shear=None,
omega_bulk=None,
omega_odd=None,
omega_even=None,
rho_0=None,
target=None,
zero_centered: bool = False,
):
# Parameters # Parameters
stencil = LBStencil(Stencil.D3Q19) stencil = LBStencil(Stencil.D3Q19)
# Setup data handling # Setup data handling
dh = ps.create_data_handling((size,) * stencil.D, periodicity=True, default_target=target) dh = ps.create_data_handling(
src = dh.add_array('src', values_per_cell=stencil.Q, layout='f') (size,) * stencil.D, periodicity=True, default_target=target
dst = dh.add_array_like('dst', 'src') )
rho = dh.add_array('rho', layout='f', latex_name='\\rho', values_per_cell=1) src = dh.add_array("src", values_per_cell=stencil.Q, layout="f")
u = dh.add_array('u', values_per_cell=dh.dim, layout='f') dst = dh.add_array_like("dst", "src")
pressure_field = dh.add_array('pressure', values_per_cell=( rho = dh.add_array("rho", layout="f", latex_name="\\rho", values_per_cell=1)
3, 3), layout='f', gpu=target == Target.GPU) u = dh.add_array("u", values_per_cell=dh.dim, layout="f")
pressure_field = dh.add_array(
"pressure", values_per_cell=(3, 3), layout="f", gpu=target == Target.GPU
)
force_field = dh.add_array( force_field = dh.add_array(
'force', values_per_cell=stencil.D, layout='f', gpu=target == Target.GPU) "force", values_per_cell=stencil.D, layout="f", gpu=target == Target.GPU
)
# Method setup # Method setup
lbm_config = LBMConfig(stencil=stencil, method=Method.MRT, compressible=True, lbm_config = LBMConfig(
weighted=True, zero_centered=False, relaxation_rates=rr_getter, stencil=stencil,
force_model=Guo(force=force_field.center_vector), method=Method.MRT,
fluctuating={'temperature': kT}, compressible=True,
kernel_type='collide_only') weighted=True,
zero_centered=zero_centered,
relaxation_rates=rr_getter,
force_model=Guo(force=force_field.center_vector),
fluctuating={"temperature": kT},
kernel_type="collide_only",
)
lb_method = create_lb_method(lbm_config=lbm_config) lb_method = create_lb_method(lbm_config=lbm_config)
lbm_config.lb_method = lb_method lbm_config.lb_method = lb_method
lbm_opt = LBMOptimisation(symbolic_field=src, cse_global=True)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
add_pressure_output_to_collision_rule(collision_rule, pressure_field)
collision = create_lb_update_rule(collision_rule=collision_rule, lbm_opt = LBMOptimisation(symbolic_field=src, cse_global=True)
lbm_config=lbm_config, lbm_optimisation=lbm_opt) collision_rule = create_lb_collision_rule(
stream = create_stream_pull_with_output_kernel(collision.method, src, dst, lbm_config=lbm_config, lbm_optimisation=lbm_opt
{'density': rho, 'velocity': u}) )
# add_pressure_output_to_collision_rule(collision_rule, pressure_field)
collision = create_lb_update_rule(
collision_rule=collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_opt
)
stream = create_stream_pull_with_output_kernel(
collision.method,
src,
dst,
{"density": rho, "velocity": u, "moment2": pressure_field},
)
config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target) config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target)
...@@ -128,15 +171,18 @@ def get_fluctuating_lb(size=None, kT=None, ...@@ -128,15 +171,18 @@ def get_fluctuating_lb(size=None, kT=None,
sync_pdfs = dh.synchronization_function([src.name]) sync_pdfs = dh.synchronization_function([src.name])
# Initialization # Initialization
init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim, init = macroscopic_values_setter(
pdfs=src.center_vector, density=rho.center) collision.method,
velocity=(0,) * dh.dim,
pdfs=src.center_vector,
density=rho_0
)
init_kernel = ps.create_kernel(init, ghost_layers=0).compile() init_kernel = ps.create_kernel(init, ghost_layers=0).compile()
dh.fill(rho.name, rho_0) dh.fill(rho.name, rho_0)
dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True) dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
dh.fill(u.name, 0) dh.fill(u.name, 0)
dh.fill(force_field.name, np.nan, dh.fill(force_field.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
ghost_layers=True, inner_ghost_layers=True)
dh.fill(force_field.name, 0) dh.fill(force_field.name, 0)
dh.run_kernel(init_kernel) dh.run_kernel(init_kernel)
...@@ -144,8 +190,15 @@ def get_fluctuating_lb(size=None, kT=None, ...@@ -144,8 +190,15 @@ def get_fluctuating_lb(size=None, kT=None,
def time_loop(start, steps): def time_loop(start, steps):
dh.all_to_gpu() dh.all_to_gpu()
for i in range(start, start + steps): for i in range(start, start + steps):
dh.run_kernel(collision_kernel, omega_shear=omega_shear, omega_bulk=omega_bulk, dh.run_kernel(
omega_odd=omega_odd, omega_even=omega_even, seed=42, time_step=i) collision_kernel,
omega_shear=omega_shear,
omega_bulk=omega_bulk,
omega_odd=omega_odd,
omega_even=omega_even,
seed=42,
time_step=i,
)
sync_pdfs() sync_pdfs()
dh.run_kernel(stream_kernel) dh.run_kernel(stream_kernel)
...@@ -156,13 +209,27 @@ def get_fluctuating_lb(size=None, kT=None, ...@@ -156,13 +209,27 @@ def get_fluctuating_lb(size=None, kT=None,
return dh, time_loop return dh, time_loop
def test_resting_fluid(target=Target.CPU): @pytest.mark.parametrize(
rho_0 = 0.86 "zero_centered", [False, True], ids=["regular-storage", "zero-centered"]
kT = 4E-4 )
L = [60] * 3 @pytest.mark.parametrize(
dh, time_loop = get_fluctuating_lb(size=L[0], target=target, "domain_size", [8, 60]
rho_0=rho_0, kT=kT, )
omega_shear=0.8, omega_bulk=0.5, omega_even=.04, omega_odd=0.3) def test_resting_fluid(zero_centered: bool, domain_size: int, target=Target.CPU):
rho_0 = 0.86
kT = 4e-4
L = [domain_size] * 3
dh, time_loop = get_fluctuating_lb(
size=L[0],
target=target,
rho_0=rho_0,
kT=kT,
omega_shear=0.8,
omega_bulk=0.5,
omega_even=0.04,
omega_odd=0.3,
zero_centered=zero_centered,
)
# Test # Test
t = 0 t = 0
...@@ -176,38 +243,43 @@ def test_resting_fluid(target=Target.CPU): ...@@ -176,38 +243,43 @@ def test_resting_fluid(target=Target.CPU):
res_u = dh.gather_array("u").reshape((-1, 3)) res_u = dh.gather_array("u").reshape((-1, 3))
res_rho = dh.gather_array("rho").reshape((-1,)) res_rho = dh.gather_array("rho").reshape((-1,))
# mass conservation # mass conservationo
# density per cell fluctuates, but toal mass is conserved
np.testing.assert_allclose(np.mean(res_rho), rho_0, atol=3E-12) np.testing.assert_allclose(np.mean(res_rho), rho_0, atol=3E-12)
# momentum conservation # momentum conservation
momentum = np.dot(res_rho, res_u) momentum = np.dot(res_rho, res_u)
np.testing.assert_allclose(momentum, [0, 0, 0], atol=1E-10) np.testing.assert_allclose(momentum, [0, 0, 0], atol=1e-10)
# temperature # temperature (fluctuates around pre-set kT)
kinetic_energy = 1 / 2 * np.dot(res_rho, res_u * res_u) / np.prod(L) kinetic_energy = 1 / 2 * np.dot(res_rho, res_u * res_u) / np.prod(L)
np.testing.assert_allclose( kT_tol = 0.075 *(16/domain_size)**(3/2)
kinetic_energy, [kT / 2] * 3, atol=kT * 0.01) np.testing.assert_allclose(kinetic_energy, [kT / 2] * 3, rtol=kT_tol)
# velocity distribution # velocity distribution
v_hist, v_bins = np.histogram( v_hist, v_bins = np.histogram(
res_u, bins=11, range=(-.075, .075), density=True) res_u, bins=11, range=(-0.075, 0.075), density=True
)
# Calculate expected values from single # Calculate expected values from single
v_expected = [] v_expected = []
for j in range(len(v_hist)): for j in range(len(v_hist)):
# Maxwell distribution # Maxwell distribution
res = 1. / (v_bins[j + 1] - v_bins[j]) * \ res = (
single_component_maxwell( 1.0
v_bins[j], v_bins[j + 1], kT, rho_0) / (v_bins[j + 1] - v_bins[j])
* single_component_maxwell(v_bins[j], v_bins[j + 1], kT, rho_0)
)
v_expected.append(res) v_expected.append(res)
v_expected = np.array(v_expected) v_expected = np.array(v_expected)
# 10% accuracy on the entire histogram hist_tol_all = 0.75 *(16/domain_size)**(3/2)
np.testing.assert_allclose(v_hist, v_expected, rtol=0.1) np.testing.assert_allclose(v_hist, v_expected, rtol=hist_tol_all)
# 1% accuracy on the middle part hist_tol_center = hist_tol_all/10
remove = 3 remove = 3
np.testing.assert_allclose( np.testing.assert_allclose(
v_hist[remove:-remove], v_expected[remove:-remove], rtol=0.01) v_hist[remove:-remove], v_expected[remove:-remove], rtol=hist_tol_center
)
# pressure tensor against expressions from # pressure tensor against expressions from
# Duenweg, Schiller, Ladd, https://arxiv.org/abs/0707.1581 # Duenweg, Schiller, Ladd, https://arxiv.org/abs/0707.1581
...@@ -220,19 +292,35 @@ def test_resting_fluid(target=Target.CPU): ...@@ -220,19 +292,35 @@ def test_resting_fluid(target=Target.CPU):
# Diagonal elements are rho c_s^22 +<u,u>. When the fluid is # Diagonal elements are rho c_s^22 +<u,u>. When the fluid is
# thermalized, the expectation value of <u,u> = kT due to the # thermalized, the expectation value of <u,u> = kT due to the
# equi-partition theorem. # equi-partition theorem.
p_av_expected = np.diag([rho_0 * c_s ** 2 + kT] * 3) p_av_expected = np.diag([rho_0 * c_s**2 + kT] * 3)
pressure_atol = c_s**2 / 200 *(16/domain_size)**(3/2)
np.testing.assert_allclose( np.testing.assert_allclose(
np.mean(res_pressure, axis=0), p_av_expected, atol=c_s ** 2 / 2000) np.mean(res_pressure, axis=0), p_av_expected, atol=pressure_atol)
def test_point_force(target=Target.CPU):
@pytest.mark.parametrize(
"zero_centered", [False, True], ids=["regular-storage", "zero-centered"]
)
@pytest.mark.parametrize(
"domain_size", [8, 60]
)
def test_point_force(zero_centered: bool, domain_size: int, target=Target.CPU):
"""Test momentum balance for thermalized fluid with applied poitn forces""" """Test momentum balance for thermalized fluid with applied poitn forces"""
rho_0 = 0.86 rho_0 = 0.86
kT = 4E-4 kT = 4e-4
L = [8] * 3 L = [domain_size] * 3
dh, time_loop = get_fluctuating_lb(size=L[0], target=target, dh, time_loop = get_fluctuating_lb(
rho_0=rho_0, kT=kT, size=L[0],
omega_shear=0.8, omega_bulk=0.5, omega_even=.04, omega_odd=0.3) target=target,
rho_0=rho_0,
kT=kT,
omega_shear=0.8,
omega_bulk=0.5,
omega_even=0.8,
omega_odd=0.8,
zero_centered=zero_centered
)
# Test # Test
t = 0 t = 0
...@@ -241,17 +329,17 @@ def test_point_force(target=Target.CPU): ...@@ -241,17 +329,17 @@ def test_point_force(target=Target.CPU):
introduced_momentum = np.zeros(3) introduced_momentum = np.zeros(3)
for i in range(100): for i in range(100):
point_force = 1E-5 * (np.random.random(3) - .5) point_force = 1e-2/domain_size**(3/2) * (np.random.random(3) - 0.5)
introduced_momentum += point_force introduced_momentum += point_force
# Note that ghost layers are included in the indexing # Note that ghost layers are included in the indexing
force_pos = np.random.randint(1, L[0] - 2, size=3) force_pos = np.random.randint(1, L[0] - 2, size=3)
dh.cpu_arrays["force"][force_pos[0], dh.cpu_arrays["force"][force_pos[0], force_pos[1], force_pos[2]] = point_force
force_pos[1], force_pos[2]] = point_force
t = time_loop(t, 1) t = time_loop(t, 1)
res_u = dh.gather_array("u").reshape((-1, 3)) res_u = dh.gather_array("u").reshape((-1, 3))
res_rho = dh.gather_array("rho").reshape((-1,)) res_rho = dh.gather_array("rho").reshape((-1,))
# mass conservation # mass conservation
np.testing.assert_allclose(np.mean(res_rho), rho_0, atol=3E-12) np.testing.assert_allclose(np.mean(res_rho), rho_0, atol=3E-12)
...@@ -259,52 +347,72 @@ def test_point_force(target=Target.CPU): ...@@ -259,52 +347,72 @@ def test_point_force(target=Target.CPU):
# momentum conservation # momentum conservation
momentum = np.dot(res_rho, res_u) momentum = np.dot(res_rho, res_u)
np.testing.assert_allclose( np.testing.assert_allclose(
momentum, introduced_momentum + 0.5 * point_force, atol=1E-10) momentum, introduced_momentum + 0.5 * point_force, atol=1e-10
dh.cpu_arrays["force"][force_pos[0], )
force_pos[1], force_pos[2]] = np.zeros(3) dh.cpu_arrays["force"][force_pos[0], force_pos[1], force_pos[2]] = np.zeros(3)
@pytest.mark.skipif(not get_supported_instruction_sets(), reason="No vector instruction sets supported") @pytest.mark.skipif(
@pytest.mark.parametrize('data_type', ("float32", "float64")) not get_supported_instruction_sets(), reason="No vector instruction sets supported"
@pytest.mark.parametrize('assume_aligned', (True, False)) )
@pytest.mark.parametrize('assume_inner_stride_one', (True, False)) @pytest.mark.parametrize("data_type", ("float32", "float64"))
@pytest.mark.parametrize('assume_sufficient_line_padding', (True, False)) @pytest.mark.parametrize("assume_aligned", (True, False))
def test_vectorization(data_type, assume_aligned, assume_inner_stride_one, assume_sufficient_line_padding): @pytest.mark.parametrize("assume_inner_stride_one", (True, False))
@pytest.mark.parametrize("assume_sufficient_line_padding", (True, False))
def test_vectorization(
data_type, assume_aligned, assume_inner_stride_one, assume_sufficient_line_padding
):
stencil = LBStencil(Stencil.D3Q19) stencil = LBStencil(Stencil.D3Q19)
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {data_type}[3D]", layout='fzyx') pdfs, pdfs_tmp = ps.fields(
f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {data_type}[3D]", layout="fzyx"
)
method = create_mrt_orthogonal( method = create_mrt_orthogonal(
stencil=stencil, stencil=stencil, compressible=True, weighted=True, relaxation_rates=rr_getter
)
rng_node = (
ps.rng.PhiloxTwoDoubles if data_type == "float64" else ps.rng.PhiloxFourFloats
)
lbm_config = LBMConfig(
lb_method=method,
fluctuating={
"temperature": sp.Symbol("kT"),
"rng_node": rng_node,
"block_offsets": tuple([0] * stencil.D),
},
compressible=True, compressible=True,
weighted=True, zero_centered=False,
relaxation_rates=rr_getter) stencil=method.stencil,
kernel_type="collide_only",
rng_node = ps.rng.PhiloxTwoDoubles if data_type == "float64" else ps.rng.PhiloxFourFloats )
lbm_config = LBMConfig(lb_method=method, fluctuating={'temperature': sp.Symbol("kT"), lbm_opt = LBMOptimisation(
'rng_node': rng_node, cse_global=True, symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp
'block_offsets': tuple([0] * stencil.D)}, )
compressible=True, zero_centered=False,
stencil=method.stencil, kernel_type='collide_only')
lbm_opt = LBMOptimisation(cse_global=True, symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp)
collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
instruction_sets = _skip_instruction_sets_windows(get_supported_instruction_sets()) instruction_sets = _skip_instruction_sets_windows(get_supported_instruction_sets())
instruction_set = instruction_sets[-1] instruction_set = instruction_sets[-1]
config = ps.CreateKernelConfig(target=Target.CPU, config = ps.CreateKernelConfig(
data_type=data_type, default_number_float=data_type, target=Target.CPU,
cpu_vectorize_info={'instruction_set': instruction_set, data_type=data_type,
'assume_aligned': assume_aligned, default_number_float=data_type,
'assume_inner_stride_one': assume_inner_stride_one, cpu_vectorize_info={
'assume_sufficient_line_padding': assume_sufficient_line_padding, "instruction_set": instruction_set,
} "assume_aligned": assume_aligned,
) "assume_inner_stride_one": assume_inner_stride_one,
"assume_sufficient_line_padding": assume_sufficient_line_padding,
if not assume_inner_stride_one and 'storeS' not in get_vector_instruction_set(data_type, instruction_set): },
)
if not assume_inner_stride_one and "storeS" not in get_vector_instruction_set(
data_type, instruction_set
):
with pytest.warns(UserWarning) as pytest_warnings: with pytest.warns(UserWarning) as pytest_warnings:
ast = ps.create_kernel(collision, config=config) ast = ps.create_kernel(collision, config=config)
assert 'Could not vectorize loop' in pytest_warnings[0].message.args[0] assert "Could not vectorize loop" in pytest_warnings[0].message.args[0]
else: else:
ast = ps.create_kernel(collision, config=config) ast = ps.create_kernel(collision, config=config)
ast.compile() ast.compile()
...@@ -312,31 +420,54 @@ def test_vectorization(data_type, assume_aligned, assume_inner_stride_one, assum ...@@ -312,31 +420,54 @@ def test_vectorization(data_type, assume_aligned, assume_inner_stride_one, assum
print(code) print(code)
@pytest.mark.parametrize('data_type', ("float32", "float64")) @pytest.mark.parametrize("data_type", ("float32", "float64"))
@pytest.mark.parametrize('assume_aligned', (True, False)) @pytest.mark.parametrize("assume_aligned", (True, False))
@pytest.mark.parametrize('assume_inner_stride_one', (True, False)) @pytest.mark.parametrize("assume_inner_stride_one", (True, False))
@pytest.mark.parametrize('assume_sufficient_line_padding', (True, False)) @pytest.mark.parametrize("assume_sufficient_line_padding", (True, False))
def test_fluctuating_lb_issue_188_wlb(data_type, assume_aligned, def test_fluctuating_lb_issue_188_wlb(
assume_inner_stride_one, assume_sufficient_line_padding): data_type, assume_aligned, assume_inner_stride_one, assume_sufficient_line_padding
):
stencil = LBStencil(Stencil.D3Q19) stencil = LBStencil(Stencil.D3Q19)
temperature = sp.symbols("temperature") temperature = sp.symbols("temperature")
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {data_type}[3D]", layout='fzyx') pdfs, pdfs_tmp = ps.fields(
f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {data_type}[3D]", layout="fzyx"
rng_node = ps.rng.PhiloxTwoDoubles if data_type == "float64" else ps.rng.PhiloxFourFloats )
fluctuating = {'temperature': temperature,
'block_offsets': 'walberla', rng_node = (
'rng_node': rng_node} ps.rng.PhiloxTwoDoubles if data_type == "float64" else ps.rng.PhiloxFourFloats
)
lbm_config = LBMConfig(stencil=stencil, method=Method.MRT, compressible=True, fluctuating = {
weighted=True, zero_centered=False, relaxation_rate=1.4, "temperature": temperature,
fluctuating=fluctuating) "block_offsets": "walberla",
lbm_opt = LBMOptimisation(symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp, cse_global=True) "rng_node": rng_node,
}
lbm_config = LBMConfig(
stencil=stencil,
method=Method.MRT,
compressible=True,
weighted=True,
zero_centered=False,
relaxation_rate=1.4,
fluctuating=fluctuating,
)
lbm_opt = LBMOptimisation(
symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp, cse_global=True
)
up = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) up = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
cpu_vectorize_info = {'instruction_set': 'avx', 'assume_inner_stride_one': True, 'assume_aligned': True} cpu_vectorize_info = {
config = ps.CreateKernelConfig(target=ps.Target.CPU, data_type=data_type, default_number_float=data_type, "instruction_set": "avx",
cpu_vectorize_info=cpu_vectorize_info) "assume_inner_stride_one": True,
"assume_aligned": True,
}
config = ps.CreateKernelConfig(
target=ps.Target.CPU,
data_type=data_type,
default_number_float=data_type,
cpu_vectorize_info=cpu_vectorize_info,
)
ast = create_kernel(up, config=config) ast = create_kernel(up, config=config)
code = ps.get_code_str(ast) code = ps.get_code_str(ast)
......