Compare revisions

5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6
--- a/tests/test_field.py
+++ b/tests/test_field.py
@@ -5,21 +5,33 @@ import sympy as sp
 import pystencils as ps
 from pystencils import TypedSymbol
 from pystencils.typing import create_type
-from pystencils.field import Field, FieldType, layout_string_to_tuple
+from pystencils.field import Field, FieldType, layout_string_to_tuple, spatial_layout_string_to_tuple


 def test_field_basic():
-    f = Field.create_generic('f', spatial_dimensions=2)
+    f = Field.create_generic("f", spatial_dimensions=2)
    assert FieldType.is_generic(f)
-    assert f['E'] == f[1, 0]
-    assert f['N'] == f[0, 1]
-    assert '_' in f.center._latex('dummy')
-
-    assert f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[0] == 0
-    assert f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[1] == 0
-
-    assert f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[0] == 0
-    assert f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[1] == 0
+    assert f["E"] == f[1, 0]
+    assert f["N"] == f[0, 1]
+    assert "_" in f.center._latex("dummy")
+
+    assert (
+        f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[0]
+        == 0
+    )
+    assert (
+        f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[1]
+        == 0
+    )
+
+    assert (
+        f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[0]
+        == 0
+    )
+    assert (
+        f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[1]
+        == 0
+    )

    f1 = f.new_field_with_different_name("f1")
    assert f1.ndim == f.ndim
@@ -28,7 +40,7 @@ def test_field_basic():
    fixed = ps.fields("f(5, 5) : double[20, 20]")
    assert fixed.neighbor_vector((1, 1)).shape == (5, 5)

-    f = Field.create_fixed_size('f', (10, 10), strides=(80, 8), dtype=np.float64)
+    f = Field.create_fixed_size("f", (10, 10), strides=(80, 8), dtype=np.float64)
    assert f.spatial_strides == (10, 1)
    assert f.index_strides == ()
    assert f.center_vector == sp.Matrix([f.center])
@@ -37,20 +49,21 @@ def test_field_basic():
    assert f1.ndim == f.ndim
    assert f1.values_per_cell() == f.values_per_cell()

-    f = Field.create_fixed_size('f', (8, 8, 2, 2), index_dimensions=2)
-    assert f.center_vector == sp.Matrix([[f(0, 0), f(0, 1)],
-                                         [f(1, 0), f(1, 1)]])
+    f = Field.create_fixed_size("f", (8, 8, 2, 2), index_dimensions=2)
+    assert f.center_vector == sp.Matrix([[f(0, 0), f(0, 1)], [f(1, 0), f(1, 1)]])
    field_access = f[1, 1]
    assert field_access.nr_of_coordinates == 2
-    assert field_access.offset_name == 'NE'
+    assert field_access.offset_name == "NE"
    neighbor = field_access.neighbor(coord_id=0, offset=-2)
    assert neighbor.offsets == (-1, 1)
-    assert '_' in neighbor._latex('dummy')
+    assert "_" in neighbor._latex("dummy")

-    f = Field.create_fixed_size('f', (8, 8, 2, 2, 2), index_dimensions=3)
-    assert f.center_vector == sp.Array([[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)])
+    f = Field.create_fixed_size("f", (8, 8, 2, 2, 2), index_dimensions=3)
+    assert f.center_vector == sp.Array(
+        [[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)]
+    )

-    f = Field.create_generic('f', spatial_dimensions=5, index_dimensions=2)
+    f = Field.create_generic("f", spatial_dimensions=5, index_dimensions=2)
    field_access = f[1, -1, 2, -3, 0](1, 0)
    assert field_access.offsets == (1, -1, 2, -3, 0)
    assert field_access.index == (1, 0)
@@ -60,61 +73,71 @@ def test_error_handling():
    struct_dtype = np.dtype([('a', np.int32), ('b', np.float64), ('c', np.uint32)])
    Field.create_generic('f', spatial_dimensions=2, index_dimensions=0, dtype=struct_dtype)
    with pytest.raises(ValueError) as e:
-        Field.create_generic('f', spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype)
-    assert 'index dimension' in str(e.value)
+        Field.create_generic(
+            "f", spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype
+        )
+    assert "index dimension" in str(e.value)

-    arr = np.array([[[(1,)*3, (2,)*3, (3,)*3]]*2], dtype=struct_dtype)
-    Field.create_from_numpy_array('f', arr, index_dimensions=0)
+    arr = np.array([[[(1,) * 3, (2,) * 3, (3,) * 3]] * 2], dtype=struct_dtype)
+    Field.create_from_numpy_array("f", arr, index_dimensions=0)
    with pytest.raises(ValueError) as e:
-        Field.create_from_numpy_array('f', arr, index_dimensions=1)
-    assert 'Structured arrays' in str(e.value)
+        Field.create_from_numpy_array("f", arr, index_dimensions=1)
+    assert "Structured arrays" in str(e.value)

    arr = np.zeros([3, 3, 3])
-    Field.create_from_numpy_array('f', arr, index_dimensions=2)
+    Field.create_from_numpy_array("f", arr, index_dimensions=2)
    with pytest.raises(ValueError) as e:
-        Field.create_from_numpy_array('f', arr, index_dimensions=3)
-    assert 'Too many' in str(e.value)
+        Field.create_from_numpy_array("f", arr, index_dimensions=3)
+    assert "Too many" in str(e.value)

-    Field.create_fixed_size('f', (3, 2, 4), index_dimensions=0, dtype=struct_dtype, layout='reverse_numpy')
+    Field.create_fixed_size(
+        "f", (3, 2, 4), index_dimensions=0, dtype=struct_dtype, layout="reverse_numpy"
+    )
    with pytest.raises(ValueError) as e:
-        Field.create_fixed_size('f', (3, 2, 4), index_dimensions=1, dtype=struct_dtype, layout='reverse_numpy')
-    assert 'Structured arrays' in str(e.value)
-
-    f = Field.create_fixed_size('f', (10, 10))
+        Field.create_fixed_size(
+            "f",
+            (3, 2, 4),
+            index_dimensions=1,
+            dtype=struct_dtype,
+            layout="reverse_numpy",
+        )
+    assert "Structured arrays" in str(e.value)
+
+    f = Field.create_fixed_size("f", (10, 10))
    with pytest.raises(ValueError) as e:
        f[1]
-    assert 'Wrong number of spatial indices' in str(e.value)
+    assert "Wrong number of spatial indices" in str(e.value)

-    f = Field.create_generic('f', spatial_dimensions=2, index_shape=(3,))
+    f = Field.create_generic("f", spatial_dimensions=2, index_shape=(3,))
    with pytest.raises(ValueError) as e:
        f(3)
-    assert 'out of bounds' in str(e.value)
+    assert "out of bounds" in str(e.value)

-    f = Field.create_fixed_size('f', (10, 10, 3, 4), index_dimensions=2)
+    f = Field.create_fixed_size("f", (10, 10, 3, 4), index_dimensions=2)
    with pytest.raises(ValueError) as e:
        f(3, 0)
-    assert 'out of bounds' in str(e.value)
+    assert "out of bounds" in str(e.value)

    with pytest.raises(ValueError) as e:
        f(1, 0)(1, 0)
-    assert 'Indexing an already indexed' in str(e.value)
+    assert "Indexing an already indexed" in str(e.value)

    with pytest.raises(ValueError) as e:
        f(1)
-    assert 'Wrong number of indices' in str(e.value)
+    assert "Wrong number of indices" in str(e.value)

    with pytest.raises(ValueError) as e:
-        Field.create_generic('f', spatial_dimensions=2, layout='wrong')
-    assert 'Unknown layout descriptor' in str(e.value)
+        Field.create_generic("f", spatial_dimensions=2, layout="wrong")
+    assert "Unknown layout descriptor" in str(e.value)

-    assert layout_string_to_tuple('fzyx', dim=4) == (3, 2, 1, 0)
+    assert layout_string_to_tuple("fzyx", dim=4) == (3, 2, 1, 0)
    with pytest.raises(ValueError) as e:
-        layout_string_to_tuple('wrong', dim=4)
-    assert 'Unknown layout descriptor' in str(e.value)
+        layout_string_to_tuple("wrong", dim=4)
+    assert "Unknown layout descriptor" in str(e.value)


 def test_decorator_scoping():
-    dst = ps.fields('dst : double[2D]')
+    dst = ps.fields("dst : double[2D]")

    def f1():
        a = sp.Symbol("a")
@@ -134,7 +157,7 @@ def test_decorator_scoping():


 def test_string_creation():
-    x, y, z = ps.fields('  x(4),    y(3,5) z : double[  3,  47]')
+    x, y, z = ps.fields("  x(4),    y(3,5) z : double[  3,  47]")
    assert x.index_shape == (4,)
    assert y.index_shape == (3, 5)
    assert z.spatial_shape == (3, 47)
@@ -142,19 +165,85 @@ def test_string_creation():

 def test_itemsize():

-    x = ps.fields('x: float32[1d]')
-    y = ps.fields('y:  float64[2d]')
-    i = ps.fields('i:  int16[1d]')
+    x = ps.fields("x: float32[1d]")
+    y = ps.fields("y:  float64[2d]")
+    i = ps.fields("i:  int16[1d]")

    assert x.itemsize == 4
    assert y.itemsize == 8
    assert i.itemsize == 2


+def test_spatial_memory_layout_descriptors():
+    assert (
+        spatial_layout_string_to_tuple("AoS", 3)
+        == spatial_layout_string_to_tuple("aos", 3)
+        == spatial_layout_string_to_tuple("ZYXF", 3)
+        == spatial_layout_string_to_tuple("zyxf", 3)
+        == (2, 1, 0)
+    )
+    assert (
+        spatial_layout_string_to_tuple("SoA", 3)
+        == spatial_layout_string_to_tuple("soa", 3)
+        == spatial_layout_string_to_tuple("FZYX", 3)
+        == spatial_layout_string_to_tuple("fzyx", 3)
+        == spatial_layout_string_to_tuple("f", 3)
+        == spatial_layout_string_to_tuple("F", 3)
+        == (2, 1, 0)
+    )
+    assert (
+        spatial_layout_string_to_tuple("c", 3)
+        == spatial_layout_string_to_tuple("C", 3)
+        == (0, 1, 2)
+    )
+
+    assert spatial_layout_string_to_tuple("C", 5) == (0, 1, 2, 3, 4)
+
+    with pytest.raises(ValueError):
+        spatial_layout_string_to_tuple("aos", -1)
+
+    with pytest.raises(ValueError):
+        spatial_layout_string_to_tuple("aos", 4)
+
+
+def test_memory_layout_descriptors():
+    assert (
+        layout_string_to_tuple("AoS", 4)
+        == layout_string_to_tuple("aos", 4)
+        == layout_string_to_tuple("ZYXF", 4)
+        == layout_string_to_tuple("zyxf", 4)
+        == (2, 1, 0, 3)
+    )
+    assert (
+        layout_string_to_tuple("SoA", 4)
+        == layout_string_to_tuple("soa", 4)
+        == layout_string_to_tuple("FZYX", 4)
+        == layout_string_to_tuple("fzyx", 4)
+        == layout_string_to_tuple("f", 4)
+        == layout_string_to_tuple("F", 4)
+        == (3, 2, 1, 0)
+    )
+    assert (
+        layout_string_to_tuple("c", 4)
+        == layout_string_to_tuple("C", 4)
+        == (0, 1, 2, 3)
+    )
+
+    assert layout_string_to_tuple("C", 5) == (0, 1, 2, 3, 4)
+
+    with pytest.raises(ValueError):
+        layout_string_to_tuple("aos", -1)
+
+    with pytest.raises(ValueError):
+        layout_string_to_tuple("aos", 5)
+
+
 def test_staggered():

    # D2Q5
-    j1, j2, j3 = ps.fields('j1(2), j2(2,2), j3(2,2,2) : double[2D]', field_type=FieldType.STAGGERED)
+    j1, j2, j3 = ps.fields(
+        "j1(2), j2(2,2), j3(2,2,2) : double[2D]", field_type=FieldType.STAGGERED
+    )

    assert j1[0, 1](1) == j1.staggered_access((0, sp.Rational(1, 2)))
    assert j1[0, 1](1) == j1.staggered_access(np.array((0, sp.Rational(1, 2))))
@@ -163,7 +252,7 @@ def test_staggered():
    assert j1[0, 1](1) == j1.staggered_access("N")
    assert j1[0, 0](1) == j1.staggered_access("S")
    assert j1.staggered_vector_access("N") == sp.Matrix([j1.staggered_access("N")])
-    assert j1.staggered_stencil_name == 'D2Q5'
+    assert j1.staggered_stencil_name == "D2Q5"

    assert j1.physical_coordinates[0] == TypedSymbol("ctr_0", create_type("int"), nonnegative=True)
    assert j1.physical_coordinates[1] == TypedSymbol("ctr_1", create_type("int"), nonnegative=True)
@@ -176,28 +265,40 @@ def test_staggered():

    assert j2[0, 1](1, 1) == j2.staggered_access((0, sp.Rational(1, 2)), 1)
    assert j2[0, 1](1, 1) == j2.staggered_access("N", 1)
-    assert j2.staggered_vector_access("N") == sp.Matrix([j2.staggered_access("N", 0), j2.staggered_access("N", 1)])
+    assert j2.staggered_vector_access("N") == sp.Matrix(
+        [j2.staggered_access("N", 0), j2.staggered_access("N", 1)]
+    )

    assert j3[0, 1](1, 1, 1) == j3.staggered_access((0, sp.Rational(1, 2)), (1, 1))
    assert j3[0, 1](1, 1, 1) == j3.staggered_access("N", (1, 1))
-    assert j3.staggered_vector_access("N") == sp.Matrix([[j3.staggered_access("N", (i, j))
-                                                        for j in range(2)] for i in range(2)])
+    assert j3.staggered_vector_access("N") == sp.Matrix(
+        [[j3.staggered_access("N", (i, j)) for j in range(2)] for i in range(2)]
+    )

    # D2Q9
-    k1, k2 = ps.fields('k1(4), k2(2) : double[2D]', field_type=FieldType.STAGGERED)
+    k1, k2 = ps.fields("k1(4), k2(2) : double[2D]", field_type=FieldType.STAGGERED)

    assert k1[1, 1](2) == k1.staggered_access("NE")
    assert k1[0, 0](2) == k1.staggered_access("SW")
    assert k1[0, 0](3) == k1.staggered_access("NW")
-    
+
    a = k1.staggered_access("NE")
-    assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(1, 2), sp.Rational(1, 2)]
+    assert a._staggered_offset(a.offsets, a.index[0]) == [
+        sp.Rational(1, 2),
+        sp.Rational(1, 2),
+    ]
    a = k1.staggered_access("SW")
-    assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(-1, 2), sp.Rational(-1, 2)]
+    assert a._staggered_offset(a.offsets, a.index[0]) == [
+        sp.Rational(-1, 2),
+        sp.Rational(-1, 2),
+    ]
    a = k1.staggered_access("NW")
-    assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(-1, 2), sp.Rational(1, 2)]
+    assert a._staggered_offset(a.offsets, a.index[0]) == [
+        sp.Rational(-1, 2),
+        sp.Rational(1, 2),
+    ]

    # sign reversed when using as flux field
-    r = ps.fields('r(2) : double[2D]', field_type=FieldType.STAGGERED_FLUX)
+    r = ps.fields("r(2) : double[2D]", field_type=FieldType.STAGGERED_FLUX)
    assert r[0, 0](0) == r.staggered_access("W")
    assert -r[1, 0](0) == r.staggered_access("E")
--- a/tests/test_gpu.py
+++ b/tests/test_gpu.py
 import pytest

 import numpy as np
-import cupy as cp
 import sympy as sp
+import math
 from scipy.ndimage import convolve

-from pystencils import Assignment, Field, fields, CreateKernelConfig, create_kernel, Target
+from pystencils import Assignment, Field, fields, CreateKernelConfig, create_kernel, Target, get_code_str
 from pystencils.gpu import BlockIndexing
 from pystencils.simp import sympy_cse_on_assignment_list
 from pystencils.slicing import add_ghost_layers, make_slice, remove_ghost_layers, normalize_slice

 try:
-    import cupy
-    device_numbers = range(cupy.cuda.runtime.getDeviceCount())
+    import cupy as cp
+    device_numbers = range(cp.cuda.runtime.getDeviceCount())
 except ImportError:
    device_numbers = []
+    cp = None


 def test_averaging_kernel():
+    pytest.importorskip('cupy')
    size = (40, 55)
    src_arr = np.random.rand(*size)
    src_arr = add_ghost_layers(src_arr)
@@ -44,6 +46,7 @@ def test_averaging_kernel():


 def test_variable_sized_fields():
+    pytest.importorskip('cupy')
    src_field = Field.create_generic('src', spatial_dimensions=2)
    dst_field = Field.create_generic('dst', spatial_dimensions=2)

@@ -71,6 +74,7 @@ def test_variable_sized_fields():


 def test_multiple_index_dimensions():
+    pytest.importorskip('cupy')
    """Sums along the last axis of a numpy array"""
    src_size = (7, 6, 4)
    dst_size = src_size[:2]
@@ -103,6 +107,7 @@ def test_multiple_index_dimensions():


 def test_ghost_layer():
+    pytest.importorskip('cupy')
    size = (6, 5)
    src_arr = np.ones(size)
    dst_arr = np.zeros_like(src_arr)
@@ -127,6 +132,7 @@ def test_ghost_layer():


 def test_setting_value():
+    pytest.importorskip('cupy')
    arr_cpu = np.arange(25, dtype=np.float64).reshape(5, 5)
    arr_gpu = cp.asarray(arr_cpu)

@@ -143,6 +149,7 @@ def test_setting_value():


 def test_periodicity():
+    pytest.importorskip('cupy')
    from pystencils.gpu.periodicity import get_periodic_boundary_functor as periodic_gpu
    from pystencils.slicing import get_periodic_boundary_functor as periodic_cpu

@@ -163,6 +170,7 @@ def test_periodicity():

 @pytest.mark.parametrize("device_number", device_numbers)
 def test_block_indexing(device_number):
+    pytest.importorskip('cupy')
    f = fields("f: [3D]")
    s = normalize_slice(make_slice[:, :, :], f.spatial_shape)
    bi = BlockIndexing(s, f.layout, block_size=(16, 8, 2),
@@ -195,6 +203,7 @@ def test_block_indexing(device_number):
 @pytest.mark.parametrize('layout', ("C", "F"))
 @pytest.mark.parametrize('shape', ((5, 5, 5, 5), (3, 17, 387, 4), (23, 44, 21, 11)))
 def test_four_dimensional_kernel(gpu_indexing, layout, shape):
+    pytest.importorskip('cupy')
    n_elements = np.prod(shape)

    arr_cpu = np.arange(n_elements, dtype=np.float64).reshape(shape, order=layout)
@@ -210,3 +219,39 @@ def test_four_dimensional_kernel(gpu_indexing, layout, shape):

    kernel(f=arr_gpu, value=np.float64(42.0))
    np.testing.assert_equal(arr_gpu.get(), np.ones(shape) * 42.0)
+
+
+@pytest.mark.parametrize('start', (1, 5))
+@pytest.mark.parametrize('end', (-1, -2, -3, -4))
+@pytest.mark.parametrize('step', (1, 2, 3, 4))
+@pytest.mark.parametrize('shape', ([55, 60], [77, 101, 80], [44, 64, 66]))
+def test_guards_with_iteration_slices(start, end, step, shape):
+    iter_slice = tuple([slice(start, end, step)] * len(shape))
+
+    kernel_config_gpu = CreateKernelConfig(target=Target.GPU, iteration_slice=iter_slice)
+    field_1 = fields(f"f(1) : double{list(shape)}")
+    assignment = Assignment(field_1.center, 1)
+    ast = create_kernel(assignment, config=kernel_config_gpu)
+    code_str = get_code_str(ast)
+
+    test_strings = list()
+    iteration_ranges = list()
+    for i, s in enumerate(iter_slice):
+        e = ((shape[i] + end) - s.start) / s.step
+        e = math.ceil(e) + s.start
+        test_strings.append(f"{s.start} < {e}")
+
+        a = s.start
+        counter = 0
+        while a < e:
+            a += 1
+            counter += 1
+        iteration_ranges.append(counter)
+
+    # check if the expected if statement is in the GPU code
+    for s in test_strings:
+        assert s in code_str
+
+    # check if these bounds lead to same lengths as the range function would produce
+    for i in range(len(iter_slice)):
+        assert iteration_ranges[i] == len(range(iter_slice[i].start, shape[i] + end, iter_slice[i].step))
--- a/tests/test_indexed_kernels.py
+++ b/tests/test_indexed_kernels.py
@@ -64,15 +64,15 @@ def test_indexed_domain_kernel(index_size, array_size, target, dtype):
    src = sp.IndexedBase(TypedSymbol(f"_data_{f.name}", dtype=const_pointer_type), shape=index_src)
    dst = sp.IndexedBase(TypedSymbol(f"_data_{g.name}", dtype=pointer_type), shape=index_dst)

-    update_rule = [ps.Assignment(FieldPointerSymbol("f", dtype, const=True), src[index]),
-                   ps.Assignment(FieldPointerSymbol("g", dtype, const=False), dst[index]),
+    update_rule = [ps.Assignment(FieldPointerSymbol("f", dtype, const=True), src[index + 1]),
+                   ps.Assignment(FieldPointerSymbol("g", dtype, const=False), dst[index + 1]),
                   ps.Assignment(g.center, f.center)]

    ast = ps.create_kernel(update_rule, target=target)

    code = ps.get_code_str(ast)
-    assert f"const {dtype.c_name} * RESTRICT _data_f = (({dtype.c_name} * RESTRICT const)(_data_f[index]));" in code
-    assert f"{dtype.c_name} * RESTRICT  _data_g = (({dtype.c_name} * RESTRICT )(_data_g[index]));" in code
+    assert f"const {dtype.c_name} * RESTRICT _data_f = (({dtype.c_name} * RESTRICT const)(_data_f[index + 1]));" in code
+    assert f"{dtype.c_name} * RESTRICT  _data_g = (({dtype.c_name} * RESTRICT )(_data_g[index + 1]));" in code

    if target == Target.CPU:
        assert code.count("for") == f.spatial_dimensions + 1

--- a/tests/test_jupyter_extensions.ipynb
+++ b/tests/test_jupyter_extensions.ipynb
--- a/tests/test_math_functions.py
+++ b/tests/test_math_functions.py
@@ -39,7 +39,7 @@ def test_two_arguments(dtype, func, target):


 @pytest.mark.parametrize('dtype', ["float64", "float32"])
-@pytest.mark.parametrize('func', [sp.sin, sp.cos, sp.sinh, sp.cosh, sp.atan])
+@pytest.mark.parametrize('func', [sp.sin, sp.cos, sp.sinh, sp.cosh, sp.atan, sp.floor, sp.ceiling])
 @pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
 def test_single_arguments(dtype, func, target):
    if target == ps.Target.GPU:
@@ -58,7 +58,8 @@ def test_single_arguments(dtype, func, target):
    ast = ps.create_kernel(up, config=config)
    code = ps.get_code_str(ast)
    if dtype == 'float32':
-        assert func.__name__.lower() in code
+        func_name = func.__name__.lower() if func is not sp.ceiling else "ceil"
+        assert func_name in code
    kernel = ast.compile()

    dh.all_to_gpu()

--- a/tests/test_phasefield_dentritic_3D.ipynb
+++ b/tests/test_phasefield_dentritic_3D.ipynb
--- a/tests/test_random.py
+++ b/tests/test_random.py
@@ -32,7 +32,7 @@ if get_compiler_config()['os'] == 'windows':
 def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), offset_values=None):
    if target == Target.GPU:
        pytest.importorskip('cupy')
-    if instruction_sets and {'neon', 'sve', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni':
+    if instruction_sets and {'neon', 'sve', 'sve2', 'sme', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni':
        pytest.xfail('AES not yet implemented for this architecture')
    if rng == 'aesni' and len(keys) == 2:
        keys *= 2
@@ -122,7 +122,7 @@ def test_rng_offsets(kind, vectorized):
 @pytest.mark.parametrize('rng', ('philox', 'aesni'))
 @pytest.mark.parametrize('precision,dtype', (('float', 'float'), ('double', 'double')))
 def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None):
-    if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni':
+    if (target in ['neon', 'vsx', 'rvv', 'sme'] or target.startswith('sve')) and rng == 'aesni':
        pytest.xfail('AES not yet implemented for this architecture')
    cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target}


--- a/tests/test_simplifications.py
+++ b/tests/test_simplifications.py
@@ -146,6 +146,16 @@ def test_add_subexpressions_for_field_reads():
    assert isinstance(ac3.subexpressions[0].lhs, TypedSymbol)
    assert ac3.subexpressions[0].lhs.dtype == BasicType("float32")

+    # added check for early out of add_subexpressions_for_field_reads is no fields appear on the rhs (See #92)
+    main = [Assignment(s[0, 0](0), 3.0),
+            Assignment(s[0, 0](1), 4.0)]
+
+    ac4 = AssignmentCollection(main, subexpressions)
+    assert len(ac4.subexpressions) == 0
+    ac5 = add_subexpressions_for_field_reads(ac4)
+    assert ac5 is not None
+    assert ac4 is ac5
+

 @pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
 @pytest.mark.parametrize('dtype', ('float32', 'float64'))

--- a/tests/test_sliced_iteration.py
+++ b/tests/test_sliced_iteration.py
 import numpy as np
 import sympy as sp
+import pytest

-from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
+from pystencils import (
+    Assignment,
+    Field,
+    TypedSymbol,
+    create_kernel,
+    make_slice,
+    Target,
+    create_data_handling,
+)
 from pystencils.simp import sympy_cse_on_assignment_list


-def test_sliced_iteration():
+@pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
+def test_sliced_iteration(target):
+    if target == Target.GPU:
+        pytest.importorskip("cupy")
+
    size = (4, 4)
-    src_arr = np.ones(size)
-    dst_arr = np.zeros_like(src_arr)
-    src_field = Field.create_from_numpy_array('src', src_arr)
-    dst_field = Field.create_from_numpy_array('dst', dst_arr)
+
+    dh = create_data_handling(size, default_target=target, default_ghost_layers=0)
+
+    src_field = dh.add_array("src", 1)
+    dst_field = dh.add_array("dst", 1)
+
+    dh.fill(src_field.name, 1.0, ghost_layers=True)
+    dh.fill(dst_field.name, 0.0, ghost_layers=True)

    a, b = sp.symbols("a b")
-    update_rule = Assignment(dst_field[0, 0],
-                             (a * src_field[0, 1] + a * src_field[0, -1] +
-                              b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
+    update_rule = Assignment(
+        dst_field[0, 0],
+        (
+            a * src_field[0, 1]
+            + a * src_field[0, -1]
+            + b * src_field[1, 0]
+            + b * src_field[-1, 0]
+        )
+        / 4,
+    )
+
+    s = make_slice[1:3, 1]
+    kernel = create_kernel(
+        sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target
+    ).compile()
+
+    if target == Target.GPU:
+        dh.all_to_gpu()
+
+    dh.run_kernel(kernel, a=1.0, b=1.0)
+
+    if target == Target.GPU:
+        dh.all_to_cpu()
+
+    expected_result = np.zeros(size)
+    expected_result[1:3, 1] = 1
+    np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)
+
+
+@pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
+def test_symbols_in_slice(target):
+    if target == Target.GPU:
+        pytest.xfail("Iteration slices including arbitrary symbols are currently broken on GPU")
+
+    size = (4, 4)
+
+    dh = create_data_handling(size, default_target=target, default_ghost_layers=0)
+
+    src_field = dh.add_array("src", 1)
+    dst_field = dh.add_array("dst", 1)
+
+    dh.fill(src_field.name, 1.0, ghost_layers=True)
+    dh.fill(dst_field.name, 0.0, ghost_layers=True)
+
+    a, b = sp.symbols("a b")
+    update_rule = Assignment(
+        dst_field[0, 0],
+        (
+            a * src_field[0, 1]
+            + a * src_field[0, -1]
+            + b * src_field[1, 0]
+            + b * src_field[-1, 0]
+        )
+        / 4,
+    )

    x_end = TypedSymbol("x_end", "int")
    s = make_slice[1:x_end, 1]
    x_end_value = size[1] - 1
-    kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
+    kernel = create_kernel(
+        sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target
+    ).compile()
+
+    if target == Target.GPU:
+        dh.all_to_gpu()
+
+    dh.run_kernel(kernel, a=1.0, b=1.0, x_end=x_end_value)

-    kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
+    if target == Target.GPU:
+        dh.all_to_cpu()

    expected_result = np.zeros(size)
    expected_result[1:x_end_value, 1] = 1
-    np.testing.assert_almost_equal(expected_result, dst_arr)
+    np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)
--- a/tests/test_vectorization.py
+++ b/tests/test_vectorization.py
@@ -143,10 +143,10 @@ def test_aligned_and_nt_stores(openmp, instruction_set=instruction_set):
    # Without the base pointer spec, the inner store is not aligned
    config = pystencils.config.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp)
    ast = ps.create_kernel(update_rule, config=config)
-    if instruction_set in ['sse'] or instruction_set.startswith('avx'):
+    if instruction_set in ['sse'] or instruction_set.startswith('avx') or instruction_set.startswith('sve'):
        assert 'stream' in ast.instruction_set
        assert 'streamFence' in ast.instruction_set
-    if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'):
+    if instruction_set in ['neon', 'vsx', 'rvv']:
        assert 'cachelineZero' in ast.instruction_set
    if instruction_set in ['vsx']:
        assert 'storeAAndFlushCacheline' in ast.instruction_set
@@ -331,7 +331,7 @@ def test_logical_operators(instruction_set=instruction_set):


 def test_hardware_query():
-    assert {'sse', 'neon', 'sve', 'vsx', 'rvv'}.intersection(supported_instruction_sets)
+    assert {'sse', 'neon', 'sve', 'sve2', 'sme', 'vsx', 'rvv'}.intersection(supported_instruction_sets)


 def test_vectorised_pow(instruction_set=instruction_set):

--- a/tests/test_vectorization_specific.py
+++ b/tests/test_vectorization_specific.py
@@ -8,8 +8,10 @@ import sympy as sp
 import pystencils as ps
 from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets,
                                                       get_vector_instruction_set)
-from . import test_vectorization
 from pystencils.enums import Target
+from pystencils.typing import CFunction
+from . import test_vectorization
+

 supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []

@@ -39,7 +41,7 @@ def test_vectorisation_varying_arch(instruction_set):

 @pytest.mark.parametrize('dtype', ('float32', 'float64'))
 @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
-def test_vectorized_abs(instruction_set, dtype):
+def test_vectorized_abs_field(instruction_set, dtype):
    """Some instructions sets have abs, some don't.
       Furthermore, the special treatment of unary minus makes this data type-sensitive too.
    """
@@ -58,24 +60,47 @@ def test_vectorized_abs(instruction_set, dtype):
    np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3)


+@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
+def test_vectorized_abs_scalar(instruction_set):
+    """Some instructions sets have abs, some don't.
+       Furthermore, the special treatment of unary minus makes this data type-sensitive too.
+    """
+    arr = np.zeros((2 ** 2 + 2, 2 ** 3 + 2), dtype="float64")
+
+    f = ps.fields(f=arr)
+    update_rule = [ps.Assignment(f.center(), sp.Abs(sp.Symbol("a")))]
+
+    config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
+    ast = ps.create_kernel(update_rule, config=config)
+
+    func = ast.compile()
+    func(f=arr, a=-1)
+    np.testing.assert_equal(np.sum(arr[1:-1, 1:-1]), 2 ** 2 * 2 ** 3)
+
+
 @pytest.mark.parametrize('dtype', ('float32', 'float64'))
 @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
-def test_strided(instruction_set, dtype):
+@pytest.mark.parametrize('nontemporal', [False, True])
+def test_strided(instruction_set, dtype, nontemporal):
    f, g = ps.fields(f"f, g : {dtype}[2D]")
    update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
+    config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set,
+                                                                      'nontemporal': nontemporal},
+                                                  default_number_float=dtype)
    if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) \
            and instruction_set not in ['avx512', 'avx512vl', 'rvv'] and not instruction_set.startswith('sve'):
        with pytest.warns(UserWarning) as warn:
-            config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
-                                                          default_number_float=dtype)
            ast = ps.create_kernel(update_rule, config=config)
            assert 'Could not vectorize loop' in warn[0].message.args[0]
    else:
        with pytest.warns(None) as warn:
-            config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
-                                                          default_number_float=dtype)
            ast = ps.create_kernel(update_rule, config=config)
            assert len(warn) == 0
+        instruction = 'streamS' if nontemporal and 'streamS' in ast.instruction_set else 'storeS'
+        assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
+    instruction = 'cachelineZero'
+    if instruction in ast.instruction_set:
+        assert ast.instruction_set[instruction] not in ps.get_code_str(ast)

    # ps.show_code(ast)
    func = ast.compile()
@@ -226,7 +251,7 @@ def test_issue62(dtype, instruction_set, padding):
    dy = sp.Symbol("dy")
    src, dst, rhs = ps.fields(f"src, src_tmp, rhs: {dtype}[2D]", layout='fzyx')

-    up = ps.Assignment(src[0, 0], ((dy ** 2 * (src[1, 0] + src[-1, 0]))
+    up = ps.Assignment(dst[0, 0], ((dy ** 2 * (src[1, 0] + src[-1, 0]))
                                   + (dx ** 2 * (src[0, 1] + src[0, -1]))
                                   - (rhs[0, 0] * dx ** 2 * dy ** 2)) / (2 * (dx ** 2 + dy ** 2)))

@@ -269,6 +294,22 @@ def test_div_and_unevaluated_expr(dtype, instruction_set):
    assert 'pow' not in code


+@pytest.mark.parametrize('dtype', ('float32', 'float64'))
+@pytest.mark.parametrize('instruction_set', ('sve', 'sve2', 'sme', 'rvv'))
+def test_check_ast_parameters_sizeless(dtype, instruction_set):
+    f, g = ps.fields(f"f, g: {dtype}[3D]", layout='fzyx')
+
+    update_rule = [ps.Assignment(g.center(), 2 * f.center())]
+
+    config = pystencils.config.CreateKernelConfig(data_type=dtype,
+                                                  cpu_vectorize_info={'instruction_set': instruction_set})
+    ast = ps.create_kernel(update_rule, config=config)
+    ast_symbols = [p.symbol for p in ast.get_parameters()]
+    assert ast.instruction_set['width'] not in ast_symbols
+    assert ast.instruction_set['intwidth'] not in ast_symbols
+
+
+
 # TODO this test case needs a complete rework of the vectoriser. The reason is that the vectoriser does not
 # TODO vectorise symbols at the moment because they could be strides or field sizes, thus involved in pointer arithmetic
 # TODO This means that the vectoriser only works if fields are involved on the rhs.

--- a/versioneer.py
+++ b/versioneer.py
No results found