Compare revisions

d3f62364 · d3f62364 · d3f62364 · d3f62364 · d3f62364 · d3f62364
--- a/lbmpy_tests/test_oldroydb.py
+++ b/lbmpy_tests/test_oldroydb.py
@@ -6,6 +6,7 @@ from pystencils.boundaries.boundaryconditions import Neumann, Dirichlet
 from lbmpy.boundaries.boundaryhandling import LatticeBoltzmannBoundaryHandling
 from lbmpy.boundaries import NoSlip

+from lbmpy._compat import IS_PYSTENCILS_2
 from lbmpy.oldroydb import *
 import pytest

@@ -16,6 +17,7 @@ import pytest
 pytest.importorskip('scipy.optimize')


+@pytest.mark.xfail(IS_PYSTENCILS_2, reason="Staggered kernels are unavailable")
 def test_oldroydb():
    import scipy.optimize


--- a/lbmpy_tests/test_plot.py
+++ b/lbmpy_tests/test_plot.py
--- a/lbmpy_tests/test_poisuille_channel.py
+++ b/lbmpy_tests/test_poisuille_channel.py
@@ -7,7 +7,7 @@ import pytest
 from lbmpy.enums import Stencil, CollisionSpace

 import pystencils as ps
-from poiseuille import poiseuille_channel
+from . import poiseuille


 @pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@@ -18,7 +18,10 @@ def test_poiseuille_channel(target, stencil_name, zero_centered, moment_space_co
    # Cuda
    if target == ps.Target.GPU:
        import pytest
-        pytest.importorskip("pycuda")
+        pytest.importorskip("cupy")

    cspace_info = CollisionSpace.RAW_MOMENTS if moment_space_collision else CollisionSpace.POPULATIONS
-    poiseuille_channel(target=target, stencil_name=stencil_name, zero_centered=zero_centered, collision_space_info=cspace_info)
+    poiseuille.poiseuille_channel(target=target,
+                                  stencil_name=stencil_name,
+                                  zero_centered=zero_centered,
+                                  collision_space_info=cspace_info)
--- a/lbmpy_tests/test_postprocessing.py
+++ b/lbmpy_tests/test_postprocessing.py
--- a/tests/test_psm.py
+++ b/tests/test_psm.py
+import pytest
+import numpy as np
+
+from pystencils import fields, CreateKernelConfig, Target, create_kernel, get_code_str, create_data_handling
+from lbmpy import LBMConfig, Stencil, Method, LBStencil, create_lb_method, create_lb_collision_rule, LBMOptimisation, \
+    create_lb_update_rule
+from lbmpy.partially_saturated_cells import PSMConfig
+
+
+@pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27])
+@pytest.mark.parametrize("method_enum", [Method.SRT, Method.MRT, Method.CUMULANT])
+def test_psm_integration(stencil, method_enum):
+    stencil = LBStencil(stencil)
+
+    fraction_field = fields("fraction_field: double[10, 10, 5]", layout=(2, 1, 0))
+    object_vel = fields("object_vel(3): double[10, 10, 5]", layout=(3, 2, 1, 0))
+    psm_config = PSMConfig(fraction_field=fraction_field, object_velocity_field=object_vel)
+
+    lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=1.5, compressible=True,
+                           psm_config=psm_config)
+    config = CreateKernelConfig(target=Target.CPU)#
+
+    collision_rule = create_lb_collision_rule(lbm_config=lbm_config)
+
+    ast = create_kernel(collision_rule, config=config)
+    code_str = get_code_str(ast)
+
+
+def get_data_handling_for_psm(use_psm):
+    domain_size = (10, 10)
+    stencil = LBStencil(Stencil.D2Q9)
+    dh = create_data_handling(domain_size=domain_size, periodicity=(True, False))
+
+    f = dh.add_array('f', values_per_cell=len(stencil))
+    dh.fill(f.name, 0.0, ghost_layers=True)
+    f_tmp = dh.add_array('f_tmp', values_per_cell=len(stencil))
+    dh.fill(f_tmp.name, 0.0, ghost_layers=True)
+
+    psm_config = None
+    if use_psm:
+        fraction_field = dh.add_array('fraction_field', values_per_cell=1)
+        dh.fill(fraction_field.name, 0.0, ghost_layers=True)
+        object_vel = dh.add_array('object_vel', values_per_cell=dh.dim)
+        dh.fill(object_vel.name, 0.0, ghost_layers=True)
+        psm_config = PSMConfig(fraction_field=fraction_field, object_velocity_field=object_vel)
+
+    lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=1.5, psm_config=psm_config)
+
+    lbm_optimisation = LBMOptimisation(symbolic_field=f, symbolic_temporary_field=f_tmp)
+    update_rule = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_optimisation)
+    kernel_lb_step_with_psm = create_kernel(update_rule).compile()
+    return (dh, kernel_lb_step_with_psm)
+
+
+def test_lbm_vs_psm():
+
+    psm_dh, psm_kernel = get_data_handling_for_psm(True)
+    lbm_dh, lbm_kernel = get_data_handling_for_psm(False)
+
+    for i in range(20):
+        psm_dh.run_kernel(psm_kernel)
+        psm_dh.swap('f', 'f_tmp')
+
+        lbm_dh.run_kernel(lbm_kernel)
+        lbm_dh.swap('f', 'f_tmp')
+
+    max_vel_error = np.max(np.abs(psm_dh.gather_array('f') - lbm_dh.gather_array('f')))
+    np.testing.assert_allclose(max_vel_error, 0, atol=1e-14)
--- a/lbmpy_tests/test_quicktests.py
+++ b/lbmpy_tests/test_quicktests.py
@@ -5,11 +5,11 @@ import pystencils as ps
 from lbmpy.enums import ForceModel, Method, Stencil
 from lbmpy.scenarios import create_lid_driven_cavity

-from lbmpy_tests.poiseuille import poiseuille_channel
+from . import poiseuille


 def test_poiseuille_channel_quicktest():
-    poiseuille_channel(target=ps.Target.CPU, stencil_name=Stencil.D2Q9)
+    poiseuille.poiseuille_channel(target=ps.Target.CPU, stencil_name=Stencil.D2Q9)


 def test_entropic_methods():

--- a/lbmpy_tests/test_relaxation_rate.py
+++ b/lbmpy_tests/test_relaxation_rate.py
 import pytest
 import sympy as sp
 from lbmpy.creationfunctions import create_lb_method, LBMConfig
+from lbmpy.moments import is_shear_moment, get_order
 from lbmpy.enums import Method, Stencil
 from lbmpy.relaxationrates import get_shear_relaxation_rate
 from lbmpy.stencils import LBStencil
@@ -19,3 +20,17 @@ def test_relaxation_rate():
                           relaxation_rates=omegas)
    method = create_lb_method(lbm_config=lbm_config)
    assert get_shear_relaxation_rate(method) == omegas[0]
+
+
+@pytest.mark.parametrize("method", [Method.MRT, Method.CENTRAL_MOMENT, Method.CUMULANT])
+def test_default_mrt_behaviour(method):
+    lbm_config = LBMConfig(stencil=LBStencil(Stencil.D3Q19), method=method, compressible=True)
+    method = create_lb_method(lbm_config=lbm_config)
+
+    for moment, relax_info in method.relaxation_info_dict.items():
+        if get_order(moment) <= 1:
+            assert relax_info.relaxation_rate == 0
+        elif is_shear_moment(moment, method.dim):
+            assert relax_info.relaxation_rate == sp.Symbol('omega')
+        else:
+            assert relax_info.relaxation_rate == 1
--- a/lbmpy_tests/test_scaling_widget.py
+++ b/lbmpy_tests/test_scaling_widget.py
--- a/lbmpy_tests/test_shear_flow.py
+++ b/lbmpy_tests/test_shear_flow.py
 """
-Test shear flow velocity and pressureagainst analytical solutions
+Test shear flow velocity and pressure against analytical solutions
 """


@@ -67,7 +67,7 @@ def test_shear_flow(target, stencil_name, zero_centered):

    # Cuda
    if target == ps.Target.GPU:
-        pytest.importorskip("pycuda")
+        pytest.importorskip("cupy")

    # LB parameters
    stencil = LBStencil(stencil_name)
@@ -205,7 +205,7 @@ def test_shear_flow(target, stencil_name, zero_centered):
    p_expected = rho_0 * np.identity(dh.dim) / 3.0 + dynamic_viscosity * shear_rate / correction_factor * (
        np.outer(shear_plane_normal, shear_direction) + np.transpose(np.outer(shear_plane_normal, shear_direction)))

-    # Sustract the tensorproduct of the velosity to get the pressure
+    # Subtract the tensor product of the velocity to get the pressure
    pressure_profile[:, 0, 0] -= rho_0 * profile[:, 0]**2
    
    np.testing.assert_allclose(profile[:, 0], expected[1:-1], atol=1E-9)

--- a/lbmpy_tests/test_shifted_transforms.py
+++ b/lbmpy_tests/test_shifted_transforms.py
--- a/lbmpy_tests/test_simple_equilibrium_conservation.py
+++ b/lbmpy_tests/test_simple_equilibrium_conservation.py
 import numpy as np

-from pystencils import Backend, Target, CreateKernelConfig
+from pystencils import Target, CreateKernelConfig
 from lbmpy.creationfunctions import create_lb_function, LBMConfig
 from lbmpy.enums import Method, Stencil
 from lbmpy.stencils import LBStencil
 import pytest


-@pytest.mark.parametrize('setup', [(Target.CPU, Backend.C), (Target.GPU, Backend.CUDA)])
+@pytest.mark.parametrize('target', [Target.CPU, Target.GPU])
 @pytest.mark.parametrize('method', [Method.SRT, Method.MRT, Method.CENTRAL_MOMENT, Method.CUMULANT])
 @pytest.mark.parametrize('compressible', [False, True])
 @pytest.mark.parametrize('delta_equilibrium', [False, True])
-def test_simple_equilibrium_conservation(setup, method, compressible, delta_equilibrium):
-    if setup[0] == Target.GPU:
-        pytest.importorskip("pycuda")
+def test_simple_equilibrium_conservation(target, method, compressible, delta_equilibrium):
+    if target == Target.GPU:
+        pytest.importorskip("cupy")

    if method == Method.SRT and not delta_equilibrium:
        pytest.skip()
@@ -23,18 +23,18 @@ def test_simple_equilibrium_conservation(setup, method, compressible, delta_equi

    src = np.zeros((3, 3, 9))
    dst = np.zeros_like(src)
-    config = CreateKernelConfig(target=setup[0], backend=setup[1])
+    config = CreateKernelConfig(target=target)
    lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), method=method,
                           relaxation_rate=1.8, compressible=compressible,
                           zero_centered=True, delta_equilibrium=delta_equilibrium)
    func = create_lb_function(lbm_config=lbm_config, config=config)

-    if setup[0] == Target.GPU:
-        import pycuda.gpuarray as gpuarray
-        gpu_src, gpu_dst = gpuarray.to_gpu(src), gpuarray.to_gpu(dst)
+    if target == Target.GPU:
+        import cupy
+        gpu_src, gpu_dst = cupy.asarray(src), cupy.asarray(dst)
        func(src=gpu_src, dst=gpu_dst)
-        gpu_src.get(src)
-        gpu_dst.get(dst)
+        src[:] = gpu_src.get()
+        dst[:] = gpu_dst.get()
    else:
        func(src=src, dst=dst)


--- a/lbmpy_tests/test_smagorinsky.py
+++ b/lbmpy_tests/test_smagorinsky.py
 import sympy as sp
-from pystencils.simp.subexpression_insertion import insert_constants
+from pystencils.simp import insert_constants

-from lbmpy import create_lb_collision_rule, LBMConfig, LBStencil, Stencil, Method
+from lbmpy import create_lb_collision_rule, LBMConfig, LBStencil, Stencil, Method, SubgridScaleModel


 def test_smagorinsky_with_constant_omega():
    stencil = LBStencil(Stencil.D2Q9)

-    config = LBMConfig(stencil=stencil, method=Method.SRT, smagorinsky=True, relaxation_rate=sp.Symbol("omega"))
+    config = LBMConfig(stencil=stencil, method=Method.SRT, subgrid_scale_model=SubgridScaleModel.SMAGORINSKY,
+                       relaxation_rate=sp.Symbol("omega"))
    collision_rule = create_lb_collision_rule(lbm_config=config)

-    config = LBMConfig(stencil=stencil, method=Method.SRT, smagorinsky=True, relaxation_rate=1.5)
+    config = LBMConfig(stencil=stencil, method=Method.SRT, subgrid_scale_model=SubgridScaleModel.SMAGORINSKY,
+                       relaxation_rate=1.5)
    collision_rule2 = create_lb_collision_rule(lbm_config=config)

    collision_rule = collision_rule.subs({sp.Symbol("omega"): 1.5})
    collision_rule = insert_constants(collision_rule)

-    assert collision_rule == collision_rule2
\ No newline at end of file
+    assert collision_rule == collision_rule2
--- a/tests/test_sparse_lbm.ipynb
+++ b/tests/test_sparse_lbm.ipynb
+%% Cell type:code id: tags:
+
+``` python
+from lbmpy.session import *
+from lbmpy.sparse import *
+from pystencils.field import FieldType
+
+import pystencils as ps
+```
+
+%% Cell type:code id: tags:
+
+``` python
+try:
+    import cupy
+except ImportError:
+    cupy = None
+    target = ps.Target.CPU
+    print('No cupy installed')
+
+if cupy:
+    target = ps.Target.GPU
+```
+
+%% Output
+
+    No cupy installed
+
+%% Cell type:markdown id: tags:
+
+# Sparse (list based) LBM
+
+%% Cell type:code id: tags:
+
+``` python
+domain_size = (20, 20)
+omega = 1.8
+target = ps.Target.CPU
+
+ghost_layers = 1
+arr_size = tuple(ds + 2 * ghost_layers for ds in domain_size)
+lid_velocity = 0.01
+force = 1e-6
+
+channel = True
+if channel:
+    lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), relaxation_rate=omega,
+                           compressible=False, force=(force, 0))
+else:
+    lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), relaxation_rate=omega, compressible=False)
+
+
+method = create_lb_method(lbm_config=lbm_config)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+ubb = UBB( (lid_velocity, 0) )
+noslip = NoSlip()
+flags = {
+    'fluid': 1,
+    noslip: 2,
+    ubb: 4,
+}
+flag_arr = np.zeros(arr_size, dtype=np.uint16)
+flag_arr.fill(flags['fluid'])
+
+if channel:
+    flag_arr[0, :] = 0
+    flag_arr[-1, :] = 0
+    flag_arr[:, 0] = flags[noslip]
+    flag_arr[:, -1] = flags[noslip]
+else:
+    flag_arr[:, -1] = flags[ubb]
+    flag_arr[:, 0] = flags[noslip]
+    flag_arr[0, :] = flags[noslip]
+    flag_arr[-1, :] = flags[noslip]
+
+plt.scalar_field(flag_arr)
+plt.colorbar();
+```
+
+%% Output
+
+
+
+%% Cell type:markdown id: tags:
+
+### Mappings
+
+%% Cell type:code id: tags:
+
+``` python
+mapping = SparseLbMapper(method.stencil, flag_arr, flags['fluid'], flags[noslip], flags[ubb])
+index_arr = mapping.create_index_array(ghost_layers)
+
+# Arrays
+#index_arr = index_arr_linear.reshape([len(method.stencil), mapping.num_fluid_cells])
+#index_arr = index_arr.swapaxes(0, 1)
+
+pdf_arr = np.empty((len(mapping), len(method.stencil)), order='f')
+pdf_arr_tmp = np.empty_like(pdf_arr)
+
+vel_arr = np.ones([mapping.num_fluid_cells, method.dim], order='f')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+pdf_field, pdf_field_tmp, vel_field = ps.fields("f(9), d(9), u(2): [1D]",
+                                                #f=pdf_arr[:mapping.num_fluid_cells],
+                                                #d=pdf_arr_tmp[:mapping.num_fluid_cells],
+                                                #u=vel_arr
+                                               )
+pdf_field.field_type = FieldType.CUSTOM
+pdf_field.pdf_field_tmp = FieldType.CUSTOM
+```
+
+%% Cell type:markdown id: tags:
+
+### Macroscopic quantities
+
+%% Cell type:code id: tags:
+
+``` python
+cqc = method.conserved_quantity_computation
+inp_eqs = cqc.equilibrium_input_equations_from_init_values(force_substitution=False)
+setter_eqs = method.get_equilibrium(conserved_quantity_equations=inp_eqs)
+setter_eqs = setter_eqs.new_with_substitutions({sym: pdf_field(i)
+                                                for i, sym in enumerate(method.post_collision_pdf_symbols)})
+
+config = ps.CreateKernelConfig(ghost_layers=((0, 0),))
+kernel_initialize = ps.create_kernel(setter_eqs, config=config).compile()
+
+def init():
+    kernel_initialize(f=pdf_arr[:mapping.num_fluid_cells])
+init()
+
+
+getter_eqs = cqc.output_equations_from_pdfs(pdf_field.center_vector,
+                                            {'velocity': vel_field})
+kernel_compute_u = ps.create_kernel(getter_eqs, config=config).compile()
+
+def get_velocity(arr=pdf_arr):
+    fluid_cell_arr = mapping.coordinates
+    kernel_compute_u(f=pdf_arr[:mapping.num_fluid_cells], u=vel_arr)
+    full_velocity_arr = np.zeros(flag_arr.shape + (2,), dtype=np.float64)
+    full_velocity_arr.fill(np.nan)
+    arr = fluid_cell_arr[:mapping.num_fluid_cells]
+    full_velocity_arr[arr['x'], arr['y']] = vel_arr
+    return full_velocity_arr[1:-1, 1:-1]
+```
+
+%% Cell type:markdown id: tags:
+
+### Stream Collide Kernel
+
+%% Cell type:code id: tags:
+
+``` python
+#index_field = ps.Field.create_from_numpy_array("idx", index_arr, index_dimensions=1)
+index_field = ps.Field.create_generic("idx", spatial_dimensions=1, index_dimensions=1, dtype=index_arr.dtype)
+
+collision_rule = method.get_collision_rule()
+
+Q = len(method.stencil)
+symbol_subs = {sym: pdf_field.absolute_access((index_field(i-1),),())
+               for i, sym in enumerate(method.pre_collision_pdf_symbols)}
+
+symbol_subs.update({sym: pdf_field_tmp(i) for i, sym in enumerate(method.post_collision_pdf_symbols)})
+
+symbol_subs[method.pre_collision_pdf_symbols[0]] = pdf_field(0) # special case for center
+symbol_subs
+```
+
+%% Output
+
+
+    $\displaystyle \left\{ d_{0} : {d}_{0}^{0}, \  d_{1} : {d}_{0}^{1}, \  d_{2} : {d}_{0}^{2}, \  d_{3} : {d}_{0}^{3}, \  d_{4} : {d}_{0}^{4}, \  d_{5} : {d}_{0}^{5}, \  d_{6} : {d}_{0}^{6}, \  d_{7} : {d}_{0}^{7}, \  d_{8} : {d}_{0}^{8}, \  f_{0} : {f}_{0}^{0}, \  f_{1} : {f}_{\mathbf{idx}_{0}^{0}}^{0}, \  f_{2} : {f}_{\mathbf{idx}_{0}^{1}}^{0}, \  f_{3} : {f}_{\mathbf{idx}_{0}^{2}}^{0}, \  f_{4} : {f}_{\mathbf{idx}_{0}^{3}}^{0}, \  f_{5} : {f}_{\mathbf{idx}_{0}^{4}}^{0}, \  f_{6} : {f}_{\mathbf{idx}_{0}^{5}}^{0}, \  f_{7} : {f}_{\mathbf{idx}_{0}^{6}}^{0}, \  f_{8} : {f}_{\mathbf{idx}_{0}^{7}}^{0}\right\}$
+    {d₀: d_C__0, d₁: d_C__1, d₂: d_C__2, d₃: d_C__3, d₄: d_C__4, d₅: d_C__5, d₆: d
+    _C__6, d₇: d_C__7, d₈: d_C__8, f₀: f_C__0, f₁: f_90e8a363d81d, f₂: f_84cf209b1
+    9e8, f₃: f_20435f2dfd8e, f₄: f_1b97e5aad5c8, f₅: f_b6addb4a5f44, f₆: f_bb89351
+    3e9ec, f₇: f_7d89efe28aa0, f₈: f_e8b951c1cd37}
+
+%% Cell type:code id: tags:
+
+``` python
+collision_rule = method.get_collision_rule()
+update_rule = collision_rule.new_with_substitutions(symbol_subs)
+
+from lbmpy._compat import IS_PYSTENCILS_2
+
+if IS_PYSTENCILS_2:
+    config = ps.CreateKernelConfig(ghost_layers=((0, 0),), target=target, index_dtype="uint32")
+else:
+    config = ps.CreateKernelConfig(ghost_layers=((0, 0),), target=target)
+
+
+kernel_stream_collide = ps.create_kernel(update_rule, config=config).compile()
+```
+
+%% Cell type:markdown id: tags:
+
+### Boundary Kernels
+
+%% Cell type:code id: tags:
+
+``` python
+if not channel:
+    if target == ps.Target.GPU:
+        raise NotImplementedError("UBB on GPU not working yet")
+
+    ubb_mapper = SparseLbBoundaryMapper(ubb, method, pdf_field)
+    #TODO the following line is wrong: kernel contains accesses to index_field and pdf_field which have
+    #different size: a correct kernel comes out when by change the shape is taken from index field,
+    # when taken from pdf field, a wrong kernel is generated
+    ubb_kernel = ps.create_kernel(ubb_mapper.assignments(), ghost_layers=0).compile()
+    ubb_idx_arr = ubb_mapper.create_index_arr(mapping, flags[ubb])
+    ps.show_code(ubb_kernel.ast)
+    def handle_ubb():
+        ubb_kernel(indexField=ubb_idx_arr, f=pdf_arr[:mapping.num_fluid_cells])
+else:
+    def handle_ubb():
+        pass
+```
+
+%% Cell type:markdown id: tags:
+
+### Time Loop
+
+%% Cell type:code id: tags:
+
+``` python
+def time_step():
+    global pdf_arr, pdf_arr_tmp, index_arr
+    handle_ubb()
+    if target == ps.Target.GPU:
+        gpu_pdf_arr = cupy.asarray(pdf_arr)
+        gpu_pdf_arr_tmp = cupy.asarray(pdf_arr_tmp)
+        gpu_index_arr = cupy.asarray(index_arr)
+
+        kernel_stream_collide(f=gpu_pdf_arr[:mapping.num_fluid_cells],
+                          d=gpu_pdf_arr_tmp[:mapping.num_fluid_cells],
+                          idx=gpu_index_arr)
+
+        pdf_arr[:] = gpu_pdf_arr.get()
+        pdf_arr_tmp[:] = gpu_pdf_arr_tmp.get()
+        index_arr[:] = gpu_index_arr.get()
+    else:
+        kernel_stream_collide(f=pdf_arr[:mapping.num_fluid_cells],
+                              d=pdf_arr_tmp[:mapping.num_fluid_cells],
+                              idx=index_arr)
+    pdf_arr_tmp, pdf_arr = pdf_arr, pdf_arr_tmp
+
+def run(steps=100):
+    for t in range(steps):
+        time_step()
+    return get_velocity()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+init()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+init()
+result = run(100)
+plt.vector_field(result, step=1);
+```
+
+%% Output
+
+
+
+%% Cell type:markdown id: tags:
+
+### Check against reference
+
+%% Cell type:code id: tags:
+
+``` python
+if channel:
+    reference = create_channel(domain_size, force=force, lb_method=method)
+else:
+    reference = create_lid_driven_cavity(domain_size, relaxation_rate=omega, lid_velocity=lid_velocity,
+                                         compressible=False)
+reference.run(100)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+np.testing.assert_almost_equal(reference.velocity[:, :], result)
+```
+%% Cell type:code id: tags:
+
+``` python
+from lbmpy.session import *
+from lbmpy.sparse import *
+from pystencils.field import FieldType
+
+import pystencils as ps
+```
+
+%% Cell type:code id: tags:
+
+``` python
+try:
+    import cupy
+except ImportError:
+    cupy = None
+    target = ps.Target.CPU
+    print('No cupy installed')
+
+if cupy:
+    target = ps.Target.GPU
+```
+
+%% Output
+
+    No cupy installed
+
+%% Cell type:markdown id: tags:
+
+# Sparse (list based) LBM
+
+%% Cell type:code id: tags:
+
+``` python
+domain_size = (20, 20)
+omega = 1.8
+target = ps.Target.CPU
+
+ghost_layers = 1
+arr_size = tuple(ds + 2 * ghost_layers for ds in domain_size)
+lid_velocity = 0.01
+force = 1e-6
+
+channel = True
+if channel:
+    lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), relaxation_rate=omega,
+                           compressible=False, force=(force, 0))
+else:
+    lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), relaxation_rate=omega, compressible=False)
+
+
+method = create_lb_method(lbm_config=lbm_config)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+ubb = UBB( (lid_velocity, 0) )
+noslip = NoSlip()
+flags = {
+    'fluid': 1,
+    noslip: 2,
+    ubb: 4,
+}
+flag_arr = np.zeros(arr_size, dtype=np.uint16)
+flag_arr.fill(flags['fluid'])
+
+if channel:
+    flag_arr[0, :] = 0
+    flag_arr[-1, :] = 0
+    flag_arr[:, 0] = flags[noslip]
+    flag_arr[:, -1] = flags[noslip]
+else:
+    flag_arr[:, -1] = flags[ubb]
+    flag_arr[:, 0] = flags[noslip]
+    flag_arr[0, :] = flags[noslip]
+    flag_arr[-1, :] = flags[noslip]
+
+plt.scalar_field(flag_arr)
+plt.colorbar();
+```
+
+%% Output
+
+
+
+%% Cell type:markdown id: tags:
+
+### Mappings
+
+%% Cell type:code id: tags:
+
+``` python
+mapping = SparseLbMapper(method.stencil, flag_arr, flags['fluid'], flags[noslip], flags[ubb])
+index_arr = mapping.create_index_array(ghost_layers)
+
+# Arrays
+#index_arr = index_arr_linear.reshape([len(method.stencil), mapping.num_fluid_cells])
+#index_arr = index_arr.swapaxes(0, 1)
+
+pdf_arr = np.empty((len(mapping), len(method.stencil)), order='f')
+pdf_arr_tmp = np.empty_like(pdf_arr)
+
+vel_arr = np.ones([mapping.num_fluid_cells, method.dim], order='f')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+pdf_field, pdf_field_tmp, vel_field = ps.fields("f(9), d(9), u(2): [1D]",
+                                                #f=pdf_arr[:mapping.num_fluid_cells],
+                                                #d=pdf_arr_tmp[:mapping.num_fluid_cells],
+                                                #u=vel_arr
+                                               )
+pdf_field.field_type = FieldType.CUSTOM
+pdf_field.pdf_field_tmp = FieldType.CUSTOM
+```
+
+%% Cell type:markdown id: tags:
+
+### Macroscopic quantities
+
+%% Cell type:code id: tags:
+
+``` python
+cqc = method.conserved_quantity_computation
+inp_eqs = cqc.equilibrium_input_equations_from_init_values(force_substitution=False)
+setter_eqs = method.get_equilibrium(conserved_quantity_equations=inp_eqs)
+setter_eqs = setter_eqs.new_with_substitutions({sym: pdf_field(i)
+                                                for i, sym in enumerate(method.post_collision_pdf_symbols)})
+
+config = ps.CreateKernelConfig(ghost_layers=((0, 0),))
+kernel_initialize = ps.create_kernel(setter_eqs, config=config).compile()
+
+def init():
+    kernel_initialize(f=pdf_arr[:mapping.num_fluid_cells])
+init()
+
+
+getter_eqs = cqc.output_equations_from_pdfs(pdf_field.center_vector,
+                                            {'velocity': vel_field})
+kernel_compute_u = ps.create_kernel(getter_eqs, config=config).compile()
+
+def get_velocity(arr=pdf_arr):
+    fluid_cell_arr = mapping.coordinates
+    kernel_compute_u(f=pdf_arr[:mapping.num_fluid_cells], u=vel_arr)
+    full_velocity_arr = np.zeros(flag_arr.shape + (2,), dtype=np.float64)
+    full_velocity_arr.fill(np.nan)
+    arr = fluid_cell_arr[:mapping.num_fluid_cells]
+    full_velocity_arr[arr['x'], arr['y']] = vel_arr
+    return full_velocity_arr[1:-1, 1:-1]
+```
+
+%% Cell type:markdown id: tags:
+
+### Stream Collide Kernel
+
+%% Cell type:code id: tags:
+
+``` python
+#index_field = ps.Field.create_from_numpy_array("idx", index_arr, index_dimensions=1)
+index_field = ps.Field.create_generic("idx", spatial_dimensions=1, index_dimensions=1, dtype=index_arr.dtype)
+
+collision_rule = method.get_collision_rule()
+
+Q = len(method.stencil)
+symbol_subs = {sym: pdf_field.absolute_access((index_field(i-1),),())
+               for i, sym in enumerate(method.pre_collision_pdf_symbols)}
+
+symbol_subs.update({sym: pdf_field_tmp(i) for i, sym in enumerate(method.post_collision_pdf_symbols)})
+
+symbol_subs[method.pre_collision_pdf_symbols[0]] = pdf_field(0) # special case for center
+symbol_subs
+```
+
+%% Output
+
+
+    $\displaystyle \left\{ d_{0} : {d}_{0}^{0}, \  d_{1} : {d}_{0}^{1}, \  d_{2} : {d}_{0}^{2}, \  d_{3} : {d}_{0}^{3}, \  d_{4} : {d}_{0}^{4}, \  d_{5} : {d}_{0}^{5}, \  d_{6} : {d}_{0}^{6}, \  d_{7} : {d}_{0}^{7}, \  d_{8} : {d}_{0}^{8}, \  f_{0} : {f}_{0}^{0}, \  f_{1} : {f}_{\mathbf{idx}_{0}^{0}}^{0}, \  f_{2} : {f}_{\mathbf{idx}_{0}^{1}}^{0}, \  f_{3} : {f}_{\mathbf{idx}_{0}^{2}}^{0}, \  f_{4} : {f}_{\mathbf{idx}_{0}^{3}}^{0}, \  f_{5} : {f}_{\mathbf{idx}_{0}^{4}}^{0}, \  f_{6} : {f}_{\mathbf{idx}_{0}^{5}}^{0}, \  f_{7} : {f}_{\mathbf{idx}_{0}^{6}}^{0}, \  f_{8} : {f}_{\mathbf{idx}_{0}^{7}}^{0}\right\}$
+    {d₀: d_C__0, d₁: d_C__1, d₂: d_C__2, d₃: d_C__3, d₄: d_C__4, d₅: d_C__5, d₆: d
+    _C__6, d₇: d_C__7, d₈: d_C__8, f₀: f_C__0, f₁: f_90e8a363d81d, f₂: f_84cf209b1
+    9e8, f₃: f_20435f2dfd8e, f₄: f_1b97e5aad5c8, f₅: f_b6addb4a5f44, f₆: f_bb89351
+    3e9ec, f₇: f_7d89efe28aa0, f₈: f_e8b951c1cd37}
+
+%% Cell type:code id: tags:
+
+``` python
+collision_rule = method.get_collision_rule()
+update_rule = collision_rule.new_with_substitutions(symbol_subs)
+
+from lbmpy._compat import IS_PYSTENCILS_2
+
+if IS_PYSTENCILS_2:
+    config = ps.CreateKernelConfig(ghost_layers=((0, 0),), target=target, index_dtype="uint32")
+else:
+    config = ps.CreateKernelConfig(ghost_layers=((0, 0),), target=target)
+
+
+kernel_stream_collide = ps.create_kernel(update_rule, config=config).compile()
+```
+
+%% Cell type:markdown id: tags:
+
+### Boundary Kernels
+
+%% Cell type:code id: tags:
+
+``` python
+if not channel:
+    if target == ps.Target.GPU:
+        raise NotImplementedError("UBB on GPU not working yet")
+
+    ubb_mapper = SparseLbBoundaryMapper(ubb, method, pdf_field)
+    #TODO the following line is wrong: kernel contains accesses to index_field and pdf_field which have
+    #different size: a correct kernel comes out when by change the shape is taken from index field,
+    # when taken from pdf field, a wrong kernel is generated
+    ubb_kernel = ps.create_kernel(ubb_mapper.assignments(), ghost_layers=0).compile()
+    ubb_idx_arr = ubb_mapper.create_index_arr(mapping, flags[ubb])
+    ps.show_code(ubb_kernel.ast)
+    def handle_ubb():
+        ubb_kernel(indexField=ubb_idx_arr, f=pdf_arr[:mapping.num_fluid_cells])
+else:
+    def handle_ubb():
+        pass
+```
+
+%% Cell type:markdown id: tags:
+
+### Time Loop
+
+%% Cell type:code id: tags:
+
+``` python
+def time_step():
+    global pdf_arr, pdf_arr_tmp, index_arr
+    handle_ubb()
+    if target == ps.Target.GPU:
+        gpu_pdf_arr = cupy.asarray(pdf_arr)
+        gpu_pdf_arr_tmp = cupy.asarray(pdf_arr_tmp)
+        gpu_index_arr = cupy.asarray(index_arr)
+
+        kernel_stream_collide(f=gpu_pdf_arr[:mapping.num_fluid_cells],
+                          d=gpu_pdf_arr_tmp[:mapping.num_fluid_cells],
+                          idx=gpu_index_arr)
+
+        pdf_arr[:] = gpu_pdf_arr.get()
+        pdf_arr_tmp[:] = gpu_pdf_arr_tmp.get()
+        index_arr[:] = gpu_index_arr.get()
+    else:
+        kernel_stream_collide(f=pdf_arr[:mapping.num_fluid_cells],
+                              d=pdf_arr_tmp[:mapping.num_fluid_cells],
+                              idx=index_arr)
+    pdf_arr_tmp, pdf_arr = pdf_arr, pdf_arr_tmp
+
+def run(steps=100):
+    for t in range(steps):
+        time_step()
+    return get_velocity()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+init()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+init()
+result = run(100)
+plt.vector_field(result, step=1);
+```
+
+%% Output
+
+
+
+%% Cell type:markdown id: tags:
+
+### Check against reference
+
+%% Cell type:code id: tags:
+
+``` python
+if channel:
+    reference = create_channel(domain_size, force=force, lb_method=method)
+else:
+    reference = create_lid_driven_cavity(domain_size, relaxation_rate=omega, lid_velocity=lid_velocity,
+                                         compressible=False)
+reference.run(100)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+np.testing.assert_almost_equal(reference.velocity[:, :], result)
+```
--- a/lbmpy_tests/test_split_optimization.py
+++ b/lbmpy_tests/test_split_optimization.py
@@ -5,6 +5,8 @@ from lbmpy.creationfunctions import create_lb_ast, LBMConfig, LBMOptimisation
 from lbmpy.enums import ForceModel, Method, Stencil
 from lbmpy.scenarios import create_lid_driven_cavity
 from lbmpy.stencils import LBStencil
+from lbmpy._compat import IS_PYSTENCILS_2
+
 from pystencils.sympyextensions import count_operations_in_ast
 from sympy.core.cache import clear_cache

@@ -12,6 +14,7 @@ from sympy.core.cache import clear_cache
 @pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q19])
 @pytest.mark.parametrize('compressible', [True, False])
 @pytest.mark.parametrize('method', [Method.SRT, Method.TRT])
+@pytest.mark.xfail(IS_PYSTENCILS_2, reason="Loop splitting is not available yet")
 def test_split_number_of_operations(stencil, compressible, method):
    # For the following configurations the number of operations for splitted and un-splitted version are
    # exactly equal. This is not true for D3Q15 and D3Q27 because some sub-expressions are computed in multiple
@@ -36,6 +39,7 @@ def test_split_number_of_operations(stencil, compressible, method):
 @pytest.mark.parametrize('method', [Method.SRT, Method.TRT])
 @pytest.mark.parametrize('force', [(0, 0, 0), (1e-6, 1e-7, 2e-6)])
 @pytest.mark.longrun
+@pytest.mark.xfail(IS_PYSTENCILS_2, reason="Loop splitting is not available yet")
 def test_equivalence(stencil, compressible, method, force):
    relaxation_rates = [1.8, 1.7, 1.0, 1.0, 1.0, 1.0]
    stencil = LBStencil(stencil)
@@ -57,6 +61,7 @@ def test_equivalence(stencil, compressible, method, force):


 @pytest.mark.parametrize('setup', [(Stencil.D2Q9, True, Method.SRT, 1e-7), (Stencil.D3Q19, False, Method.MRT, 0)])
+@pytest.mark.xfail(IS_PYSTENCILS_2, reason="Loop splitting is not available yet")
 def test_equivalence_short(setup):
    relaxation_rates = [1.8, 1.7, 1.0, 1.0, 1.0, 1.0]
    stencil = LBStencil(setup[0])

--- a/lbmpy_tests/test_srt_trt_simplifications.py
+++ b/lbmpy_tests/test_srt_trt_simplifications.py
--- a/lbmpy_tests/test_stencils.py
+++ b/lbmpy_tests/test_stencils.py
@@ -21,12 +21,17 @@ def get_all_stencils():
        LBStencil(Stencil.D3Q27, 'walberla'),

        LBStencil(Stencil.D2Q9, 'counterclockwise'),
-
        LBStencil(Stencil.D2Q9, 'braunschweig'),
-        LBStencil(Stencil.D3Q19, 'braunschweig'),
+        LBStencil(Stencil.D2Q9, 'uk'),
+

-        LBStencil(Stencil.D3Q27, 'premnath'),
+        LBStencil(Stencil.D3Q15, 'premnath'),
+        LBStencil(Stencil.D3Q15, 'fakhari'),
+
+        LBStencil(Stencil.D3Q19, 'braunschweig'),
+        LBStencil(Stencil.D3Q19, 'premnath'),

+        LBStencil(Stencil.D3Q27, "premnath"),
        LBStencil(Stencil.D3Q27, "fakhari"),
    ]


--- a/tests/test_stokes_setup.ipynb
+++ b/tests/test_stokes_setup.ipynb
--- a/lbmpy_tests/test_update_kernel.py
+++ b/lbmpy_tests/test_update_kernel.py
--- a/lbmpy_tests/test_vectorization.py
+++ b/lbmpy_tests/test_vectorization.py
 import numpy as np
 import pytest
+from dataclasses import replace

 import pystencils as ps
-from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
+from lbmpy._compat import get_supported_instruction_sets, IS_PYSTENCILS_2
 from lbmpy.scenarios import create_lid_driven_cavity
 from lbmpy.creationfunctions import LBMConfig, LBMOptimisation

+vector_isas = get_supported_instruction_sets()
+if vector_isas is None:
+    vector_isas = []

-@pytest.mark.skipif(not get_supported_instruction_sets(), reason='cannot detect CPU instruction set')
+
+@pytest.mark.skipif(not vector_isas, reason="cannot detect CPU instruction set")
 def test_lbm_vectorization_short():
    print("Computing reference solutions")
    size1 = (64, 32)
@@ -17,29 +22,45 @@ def test_lbm_vectorization_short():
    ldc1_ref.run(10)

    lbm_config = LBMConfig(relaxation_rate=relaxation_rate)
-    config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': get_supported_instruction_sets()[-1],
-                                                       'assume_aligned': True,
-                                                       'nontemporal': True,
-                                                       'assume_inner_stride_one': True,
-                                                       'assume_sufficient_line_padding': False,
-                                                       })
-    ldc1 = create_lid_driven_cavity(size1, lbm_config=lbm_config, config=config,
-                                    fixed_loop_sizes=False)
+    config = ps.CreateKernelConfig(
+        cpu_vectorize_info={
+            "instruction_set": get_supported_instruction_sets()[-1],
+            "assume_aligned": True,
+            "nontemporal": True,
+            "assume_inner_stride_one": True,
+            "assume_sufficient_line_padding": False,
+        }
+    )
+    ldc1 = create_lid_driven_cavity(
+        size1, lbm_config=lbm_config, config=config, fixed_loop_sizes=False
+    )
    ldc1.run(10)


-@pytest.mark.parametrize('instruction_set', get_supported_instruction_sets())
-@pytest.mark.parametrize('aligned_and_padding', [[False, False], [True, False], [True, True]])
-@pytest.mark.parametrize('nontemporal', [False, True])
-@pytest.mark.parametrize('double_precision', [False, True])
-@pytest.mark.parametrize('fixed_loop_sizes', [False, True])
+@pytest.mark.skipif(not vector_isas, reason="cannot detect CPU instruction set")
+@pytest.mark.xfail(reason="Loop splitting is not available yet")
+@pytest.mark.parametrize("instruction_set", vector_isas)
+@pytest.mark.parametrize(
+    "aligned_and_padding", [[False, False], [True, False], [True, True]]
+)
+@pytest.mark.parametrize("nontemporal", [False, True])
+@pytest.mark.parametrize("double_precision", [False, True])
+@pytest.mark.parametrize("fixed_loop_sizes", [False, True])
 @pytest.mark.longrun
-def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, double_precision, fixed_loop_sizes):
-    vectorization_options = {'instruction_set': instruction_set,
-                             'assume_aligned': aligned_and_padding[0],
-                             'nontemporal': nontemporal,
-                             'assume_inner_stride_one': True,
-                             'assume_sufficient_line_padding': aligned_and_padding[1]}
+def test_lbm_vectorization(
+    instruction_set,
+    aligned_and_padding,
+    nontemporal,
+    double_precision,
+    fixed_loop_sizes,
+):
+    vectorization_options = {
+        "instruction_set": instruction_set,
+        "assume_aligned": aligned_and_padding[0],
+        "nontemporal": nontemporal,
+        "assume_inner_stride_one": True,
+        "assume_sufficient_line_padding": aligned_and_padding[1],
+    }
    time_steps = 100
    size1 = (64, 32)
    size2 = (666, 34)
@@ -52,20 +73,40 @@ def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, do
    ldc2_ref.run(time_steps)

    lbm_config = LBMConfig(relaxation_rate=relaxation_rate)
-    config = ps.CreateKernelConfig(data_type="float64" if double_precision else "float32",
-                                   default_number_float="float64" if double_precision else "float32",
-                                   cpu_vectorize_info=vectorization_options)
+    config = ps.CreateKernelConfig(
+        data_type="float64" if double_precision else "float32",
+        cpu_vectorize_info=vectorization_options,
+    )
+
+    if not IS_PYSTENCILS_2:
+        config = replace(
+            config,
+            default_number_float="float64" if double_precision else "float32",
+        )
+
    lbm_opt_split = LBMOptimisation(cse_global=True, split=True)
    lbm_opt = LBMOptimisation(cse_global=True, split=False)

-    print(f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, "
-          f"fixed loop sizes {fixed_loop_sizes}")
-    ldc1 = create_lid_driven_cavity(size1, fixed_loop_sizes=fixed_loop_sizes,
-                                    lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config)
+    print(
+        f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, "
+        f"fixed loop sizes {fixed_loop_sizes}"
+    )
+    ldc1 = create_lid_driven_cavity(
+        size1,
+        fixed_loop_sizes=fixed_loop_sizes,
+        lbm_config=lbm_config,
+        lbm_optimisation=lbm_opt,
+        config=config,
+    )
    ldc1.run(time_steps)
    np.testing.assert_almost_equal(ldc1_ref.velocity[:, :], ldc1.velocity[:, :])

-    ldc2 = create_lid_driven_cavity(size2, fixed_loop_sizes=fixed_loop_sizes,
-                                    lbm_config=lbm_config, lbm_optimisation=lbm_opt_split, config=config)
+    ldc2 = create_lid_driven_cavity(
+        size2,
+        fixed_loop_sizes=fixed_loop_sizes,
+        lbm_config=lbm_config,
+        lbm_optimisation=lbm_opt_split,
+        config=config,
+    )
    ldc2.run(time_steps)
    np.testing.assert_almost_equal(ldc2_ref.velocity[:, :], ldc2.velocity[:, :])
--- a/lbmpy_tests/test_version_string.py
+++ b/lbmpy_tests/test_version_string.py
No results found