Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • anirudh.jonnalagadda/pystencils
  • hyteg/pystencils
  • jbadwaik/pystencils
  • jngrad/pystencils
  • itischler/pystencils
  • ob28imeq/pystencils
  • hoenig/pystencils
  • Bindgen/pystencils
  • hammer/pystencils
  • da15siwa/pystencils
  • holzer/pystencils
  • alexander.reinauer/pystencils
  • ec93ujoh/pystencils
  • Harke/pystencils
  • seitz/pystencils
  • pycodegen/pystencils
16 results
Select Git revision
Show changes
......@@ -5,21 +5,33 @@ import sympy as sp
import pystencils as ps
from pystencils import TypedSymbol
from pystencils.typing import create_type
from pystencils.field import Field, FieldType, layout_string_to_tuple
from pystencils.field import Field, FieldType, layout_string_to_tuple, spatial_layout_string_to_tuple
def test_field_basic():
f = Field.create_generic('f', spatial_dimensions=2)
f = Field.create_generic("f", spatial_dimensions=2)
assert FieldType.is_generic(f)
assert f['E'] == f[1, 0]
assert f['N'] == f[0, 1]
assert '_' in f.center._latex('dummy')
assert f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[0] == 0
assert f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[1] == 0
assert f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[0] == 0
assert f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[1] == 0
assert f["E"] == f[1, 0]
assert f["N"] == f[0, 1]
assert "_" in f.center._latex("dummy")
assert (
f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[0]
== 0
)
assert (
f.index_to_physical(index_coordinates=sp.Matrix([0, 0]), staggered=False)[1]
== 0
)
assert (
f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[0]
== 0
)
assert (
f.physical_to_index(physical_coordinates=sp.Matrix([0, 0]), staggered=False)[1]
== 0
)
f1 = f.new_field_with_different_name("f1")
assert f1.ndim == f.ndim
......@@ -28,7 +40,7 @@ def test_field_basic():
fixed = ps.fields("f(5, 5) : double[20, 20]")
assert fixed.neighbor_vector((1, 1)).shape == (5, 5)
f = Field.create_fixed_size('f', (10, 10), strides=(80, 8), dtype=np.float64)
f = Field.create_fixed_size("f", (10, 10), strides=(80, 8), dtype=np.float64)
assert f.spatial_strides == (10, 1)
assert f.index_strides == ()
assert f.center_vector == sp.Matrix([f.center])
......@@ -37,20 +49,21 @@ def test_field_basic():
assert f1.ndim == f.ndim
assert f1.values_per_cell() == f.values_per_cell()
f = Field.create_fixed_size('f', (8, 8, 2, 2), index_dimensions=2)
assert f.center_vector == sp.Matrix([[f(0, 0), f(0, 1)],
[f(1, 0), f(1, 1)]])
f = Field.create_fixed_size("f", (8, 8, 2, 2), index_dimensions=2)
assert f.center_vector == sp.Matrix([[f(0, 0), f(0, 1)], [f(1, 0), f(1, 1)]])
field_access = f[1, 1]
assert field_access.nr_of_coordinates == 2
assert field_access.offset_name == 'NE'
assert field_access.offset_name == "NE"
neighbor = field_access.neighbor(coord_id=0, offset=-2)
assert neighbor.offsets == (-1, 1)
assert '_' in neighbor._latex('dummy')
assert "_" in neighbor._latex("dummy")
f = Field.create_fixed_size('f', (8, 8, 2, 2, 2), index_dimensions=3)
assert f.center_vector == sp.Array([[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)])
f = Field.create_fixed_size("f", (8, 8, 2, 2, 2), index_dimensions=3)
assert f.center_vector == sp.Array(
[[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)]
)
f = Field.create_generic('f', spatial_dimensions=5, index_dimensions=2)
f = Field.create_generic("f", spatial_dimensions=5, index_dimensions=2)
field_access = f[1, -1, 2, -3, 0](1, 0)
assert field_access.offsets == (1, -1, 2, -3, 0)
assert field_access.index == (1, 0)
......@@ -60,61 +73,71 @@ def test_error_handling():
struct_dtype = np.dtype([('a', np.int32), ('b', np.float64), ('c', np.uint32)])
Field.create_generic('f', spatial_dimensions=2, index_dimensions=0, dtype=struct_dtype)
with pytest.raises(ValueError) as e:
Field.create_generic('f', spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype)
assert 'index dimension' in str(e.value)
Field.create_generic(
"f", spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype
)
assert "index dimension" in str(e.value)
arr = np.array([[[(1,)*3, (2,)*3, (3,)*3]]*2], dtype=struct_dtype)
Field.create_from_numpy_array('f', arr, index_dimensions=0)
arr = np.array([[[(1,) * 3, (2,) * 3, (3,) * 3]] * 2], dtype=struct_dtype)
Field.create_from_numpy_array("f", arr, index_dimensions=0)
with pytest.raises(ValueError) as e:
Field.create_from_numpy_array('f', arr, index_dimensions=1)
assert 'Structured arrays' in str(e.value)
Field.create_from_numpy_array("f", arr, index_dimensions=1)
assert "Structured arrays" in str(e.value)
arr = np.zeros([3, 3, 3])
Field.create_from_numpy_array('f', arr, index_dimensions=2)
Field.create_from_numpy_array("f", arr, index_dimensions=2)
with pytest.raises(ValueError) as e:
Field.create_from_numpy_array('f', arr, index_dimensions=3)
assert 'Too many' in str(e.value)
Field.create_from_numpy_array("f", arr, index_dimensions=3)
assert "Too many" in str(e.value)
Field.create_fixed_size('f', (3, 2, 4), index_dimensions=0, dtype=struct_dtype, layout='reverse_numpy')
Field.create_fixed_size(
"f", (3, 2, 4), index_dimensions=0, dtype=struct_dtype, layout="reverse_numpy"
)
with pytest.raises(ValueError) as e:
Field.create_fixed_size('f', (3, 2, 4), index_dimensions=1, dtype=struct_dtype, layout='reverse_numpy')
assert 'Structured arrays' in str(e.value)
f = Field.create_fixed_size('f', (10, 10))
Field.create_fixed_size(
"f",
(3, 2, 4),
index_dimensions=1,
dtype=struct_dtype,
layout="reverse_numpy",
)
assert "Structured arrays" in str(e.value)
f = Field.create_fixed_size("f", (10, 10))
with pytest.raises(ValueError) as e:
f[1]
assert 'Wrong number of spatial indices' in str(e.value)
assert "Wrong number of spatial indices" in str(e.value)
f = Field.create_generic('f', spatial_dimensions=2, index_shape=(3,))
f = Field.create_generic("f", spatial_dimensions=2, index_shape=(3,))
with pytest.raises(ValueError) as e:
f(3)
assert 'out of bounds' in str(e.value)
assert "out of bounds" in str(e.value)
f = Field.create_fixed_size('f', (10, 10, 3, 4), index_dimensions=2)
f = Field.create_fixed_size("f", (10, 10, 3, 4), index_dimensions=2)
with pytest.raises(ValueError) as e:
f(3, 0)
assert 'out of bounds' in str(e.value)
assert "out of bounds" in str(e.value)
with pytest.raises(ValueError) as e:
f(1, 0)(1, 0)
assert 'Indexing an already indexed' in str(e.value)
assert "Indexing an already indexed" in str(e.value)
with pytest.raises(ValueError) as e:
f(1)
assert 'Wrong number of indices' in str(e.value)
assert "Wrong number of indices" in str(e.value)
with pytest.raises(ValueError) as e:
Field.create_generic('f', spatial_dimensions=2, layout='wrong')
assert 'Unknown layout descriptor' in str(e.value)
Field.create_generic("f", spatial_dimensions=2, layout="wrong")
assert "Unknown layout descriptor" in str(e.value)
assert layout_string_to_tuple('fzyx', dim=4) == (3, 2, 1, 0)
assert layout_string_to_tuple("fzyx", dim=4) == (3, 2, 1, 0)
with pytest.raises(ValueError) as e:
layout_string_to_tuple('wrong', dim=4)
assert 'Unknown layout descriptor' in str(e.value)
layout_string_to_tuple("wrong", dim=4)
assert "Unknown layout descriptor" in str(e.value)
def test_decorator_scoping():
dst = ps.fields('dst : double[2D]')
dst = ps.fields("dst : double[2D]")
def f1():
a = sp.Symbol("a")
......@@ -134,7 +157,7 @@ def test_decorator_scoping():
def test_string_creation():
x, y, z = ps.fields(' x(4), y(3,5) z : double[ 3, 47]')
x, y, z = ps.fields(" x(4), y(3,5) z : double[ 3, 47]")
assert x.index_shape == (4,)
assert y.index_shape == (3, 5)
assert z.spatial_shape == (3, 47)
......@@ -142,19 +165,85 @@ def test_string_creation():
def test_itemsize():
x = ps.fields('x: float32[1d]')
y = ps.fields('y: float64[2d]')
i = ps.fields('i: int16[1d]')
x = ps.fields("x: float32[1d]")
y = ps.fields("y: float64[2d]")
i = ps.fields("i: int16[1d]")
assert x.itemsize == 4
assert y.itemsize == 8
assert i.itemsize == 2
def test_spatial_memory_layout_descriptors():
assert (
spatial_layout_string_to_tuple("AoS", 3)
== spatial_layout_string_to_tuple("aos", 3)
== spatial_layout_string_to_tuple("ZYXF", 3)
== spatial_layout_string_to_tuple("zyxf", 3)
== (2, 1, 0)
)
assert (
spatial_layout_string_to_tuple("SoA", 3)
== spatial_layout_string_to_tuple("soa", 3)
== spatial_layout_string_to_tuple("FZYX", 3)
== spatial_layout_string_to_tuple("fzyx", 3)
== spatial_layout_string_to_tuple("f", 3)
== spatial_layout_string_to_tuple("F", 3)
== (2, 1, 0)
)
assert (
spatial_layout_string_to_tuple("c", 3)
== spatial_layout_string_to_tuple("C", 3)
== (0, 1, 2)
)
assert spatial_layout_string_to_tuple("C", 5) == (0, 1, 2, 3, 4)
with pytest.raises(ValueError):
spatial_layout_string_to_tuple("aos", -1)
with pytest.raises(ValueError):
spatial_layout_string_to_tuple("aos", 4)
def test_memory_layout_descriptors():
assert (
layout_string_to_tuple("AoS", 4)
== layout_string_to_tuple("aos", 4)
== layout_string_to_tuple("ZYXF", 4)
== layout_string_to_tuple("zyxf", 4)
== (2, 1, 0, 3)
)
assert (
layout_string_to_tuple("SoA", 4)
== layout_string_to_tuple("soa", 4)
== layout_string_to_tuple("FZYX", 4)
== layout_string_to_tuple("fzyx", 4)
== layout_string_to_tuple("f", 4)
== layout_string_to_tuple("F", 4)
== (3, 2, 1, 0)
)
assert (
layout_string_to_tuple("c", 4)
== layout_string_to_tuple("C", 4)
== (0, 1, 2, 3)
)
assert layout_string_to_tuple("C", 5) == (0, 1, 2, 3, 4)
with pytest.raises(ValueError):
layout_string_to_tuple("aos", -1)
with pytest.raises(ValueError):
layout_string_to_tuple("aos", 5)
def test_staggered():
# D2Q5
j1, j2, j3 = ps.fields('j1(2), j2(2,2), j3(2,2,2) : double[2D]', field_type=FieldType.STAGGERED)
j1, j2, j3 = ps.fields(
"j1(2), j2(2,2), j3(2,2,2) : double[2D]", field_type=FieldType.STAGGERED
)
assert j1[0, 1](1) == j1.staggered_access((0, sp.Rational(1, 2)))
assert j1[0, 1](1) == j1.staggered_access(np.array((0, sp.Rational(1, 2))))
......@@ -163,7 +252,7 @@ def test_staggered():
assert j1[0, 1](1) == j1.staggered_access("N")
assert j1[0, 0](1) == j1.staggered_access("S")
assert j1.staggered_vector_access("N") == sp.Matrix([j1.staggered_access("N")])
assert j1.staggered_stencil_name == 'D2Q5'
assert j1.staggered_stencil_name == "D2Q5"
assert j1.physical_coordinates[0] == TypedSymbol("ctr_0", create_type("int"), nonnegative=True)
assert j1.physical_coordinates[1] == TypedSymbol("ctr_1", create_type("int"), nonnegative=True)
......@@ -176,28 +265,40 @@ def test_staggered():
assert j2[0, 1](1, 1) == j2.staggered_access((0, sp.Rational(1, 2)), 1)
assert j2[0, 1](1, 1) == j2.staggered_access("N", 1)
assert j2.staggered_vector_access("N") == sp.Matrix([j2.staggered_access("N", 0), j2.staggered_access("N", 1)])
assert j2.staggered_vector_access("N") == sp.Matrix(
[j2.staggered_access("N", 0), j2.staggered_access("N", 1)]
)
assert j3[0, 1](1, 1, 1) == j3.staggered_access((0, sp.Rational(1, 2)), (1, 1))
assert j3[0, 1](1, 1, 1) == j3.staggered_access("N", (1, 1))
assert j3.staggered_vector_access("N") == sp.Matrix([[j3.staggered_access("N", (i, j))
for j in range(2)] for i in range(2)])
assert j3.staggered_vector_access("N") == sp.Matrix(
[[j3.staggered_access("N", (i, j)) for j in range(2)] for i in range(2)]
)
# D2Q9
k1, k2 = ps.fields('k1(4), k2(2) : double[2D]', field_type=FieldType.STAGGERED)
k1, k2 = ps.fields("k1(4), k2(2) : double[2D]", field_type=FieldType.STAGGERED)
assert k1[1, 1](2) == k1.staggered_access("NE")
assert k1[0, 0](2) == k1.staggered_access("SW")
assert k1[0, 0](3) == k1.staggered_access("NW")
a = k1.staggered_access("NE")
assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(1, 2), sp.Rational(1, 2)]
assert a._staggered_offset(a.offsets, a.index[0]) == [
sp.Rational(1, 2),
sp.Rational(1, 2),
]
a = k1.staggered_access("SW")
assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(-1, 2), sp.Rational(-1, 2)]
assert a._staggered_offset(a.offsets, a.index[0]) == [
sp.Rational(-1, 2),
sp.Rational(-1, 2),
]
a = k1.staggered_access("NW")
assert a._staggered_offset(a.offsets, a.index[0]) == [sp.Rational(-1, 2), sp.Rational(1, 2)]
assert a._staggered_offset(a.offsets, a.index[0]) == [
sp.Rational(-1, 2),
sp.Rational(1, 2),
]
# sign reversed when using as flux field
r = ps.fields('r(2) : double[2D]', field_type=FieldType.STAGGERED_FLUX)
r = ps.fields("r(2) : double[2D]", field_type=FieldType.STAGGERED_FLUX)
assert r[0, 0](0) == r.staggered_access("W")
assert -r[1, 0](0) == r.staggered_access("E")
import pytest
import numpy as np
import cupy as cp
import sympy as sp
import math
from scipy.ndimage import convolve
from pystencils import Assignment, Field, fields, CreateKernelConfig, create_kernel, Target
from pystencils import Assignment, Field, fields, CreateKernelConfig, create_kernel, Target, get_code_str
from pystencils.gpu import BlockIndexing
from pystencils.simp import sympy_cse_on_assignment_list
from pystencils.slicing import add_ghost_layers, make_slice, remove_ghost_layers, normalize_slice
try:
import cupy
device_numbers = range(cupy.cuda.runtime.getDeviceCount())
import cupy as cp
device_numbers = range(cp.cuda.runtime.getDeviceCount())
except ImportError:
device_numbers = []
cp = None
def test_averaging_kernel():
pytest.importorskip('cupy')
size = (40, 55)
src_arr = np.random.rand(*size)
src_arr = add_ghost_layers(src_arr)
......@@ -44,6 +46,7 @@ def test_averaging_kernel():
def test_variable_sized_fields():
pytest.importorskip('cupy')
src_field = Field.create_generic('src', spatial_dimensions=2)
dst_field = Field.create_generic('dst', spatial_dimensions=2)
......@@ -71,6 +74,7 @@ def test_variable_sized_fields():
def test_multiple_index_dimensions():
pytest.importorskip('cupy')
"""Sums along the last axis of a numpy array"""
src_size = (7, 6, 4)
dst_size = src_size[:2]
......@@ -103,6 +107,7 @@ def test_multiple_index_dimensions():
def test_ghost_layer():
pytest.importorskip('cupy')
size = (6, 5)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
......@@ -127,6 +132,7 @@ def test_ghost_layer():
def test_setting_value():
pytest.importorskip('cupy')
arr_cpu = np.arange(25, dtype=np.float64).reshape(5, 5)
arr_gpu = cp.asarray(arr_cpu)
......@@ -143,6 +149,7 @@ def test_setting_value():
def test_periodicity():
pytest.importorskip('cupy')
from pystencils.gpu.periodicity import get_periodic_boundary_functor as periodic_gpu
from pystencils.slicing import get_periodic_boundary_functor as periodic_cpu
......@@ -163,6 +170,7 @@ def test_periodicity():
@pytest.mark.parametrize("device_number", device_numbers)
def test_block_indexing(device_number):
pytest.importorskip('cupy')
f = fields("f: [3D]")
s = normalize_slice(make_slice[:, :, :], f.spatial_shape)
bi = BlockIndexing(s, f.layout, block_size=(16, 8, 2),
......@@ -195,6 +203,7 @@ def test_block_indexing(device_number):
@pytest.mark.parametrize('layout', ("C", "F"))
@pytest.mark.parametrize('shape', ((5, 5, 5, 5), (3, 17, 387, 4), (23, 44, 21, 11)))
def test_four_dimensional_kernel(gpu_indexing, layout, shape):
pytest.importorskip('cupy')
n_elements = np.prod(shape)
arr_cpu = np.arange(n_elements, dtype=np.float64).reshape(shape, order=layout)
......@@ -210,3 +219,39 @@ def test_four_dimensional_kernel(gpu_indexing, layout, shape):
kernel(f=arr_gpu, value=np.float64(42.0))
np.testing.assert_equal(arr_gpu.get(), np.ones(shape) * 42.0)
@pytest.mark.parametrize('start', (1, 5))
@pytest.mark.parametrize('end', (-1, -2, -3, -4))
@pytest.mark.parametrize('step', (1, 2, 3, 4))
@pytest.mark.parametrize('shape', ([55, 60], [77, 101, 80], [44, 64, 66]))
def test_guards_with_iteration_slices(start, end, step, shape):
iter_slice = tuple([slice(start, end, step)] * len(shape))
kernel_config_gpu = CreateKernelConfig(target=Target.GPU, iteration_slice=iter_slice)
field_1 = fields(f"f(1) : double{list(shape)}")
assignment = Assignment(field_1.center, 1)
ast = create_kernel(assignment, config=kernel_config_gpu)
code_str = get_code_str(ast)
test_strings = list()
iteration_ranges = list()
for i, s in enumerate(iter_slice):
e = ((shape[i] + end) - s.start) / s.step
e = math.ceil(e) + s.start
test_strings.append(f"{s.start} < {e}")
a = s.start
counter = 0
while a < e:
a += 1
counter += 1
iteration_ranges.append(counter)
# check if the expected if statement is in the GPU code
for s in test_strings:
assert s in code_str
# check if these bounds lead to same lengths as the range function would produce
for i in range(len(iter_slice)):
assert iteration_ranges[i] == len(range(iter_slice[i].start, shape[i] + end, iter_slice[i].step))
......@@ -64,15 +64,15 @@ def test_indexed_domain_kernel(index_size, array_size, target, dtype):
src = sp.IndexedBase(TypedSymbol(f"_data_{f.name}", dtype=const_pointer_type), shape=index_src)
dst = sp.IndexedBase(TypedSymbol(f"_data_{g.name}", dtype=pointer_type), shape=index_dst)
update_rule = [ps.Assignment(FieldPointerSymbol("f", dtype, const=True), src[index]),
ps.Assignment(FieldPointerSymbol("g", dtype, const=False), dst[index]),
update_rule = [ps.Assignment(FieldPointerSymbol("f", dtype, const=True), src[index + 1]),
ps.Assignment(FieldPointerSymbol("g", dtype, const=False), dst[index + 1]),
ps.Assignment(g.center, f.center)]
ast = ps.create_kernel(update_rule, target=target)
code = ps.get_code_str(ast)
assert f"const {dtype.c_name} * RESTRICT _data_f = (({dtype.c_name} * RESTRICT const)(_data_f[index]));" in code
assert f"{dtype.c_name} * RESTRICT _data_g = (({dtype.c_name} * RESTRICT )(_data_g[index]));" in code
assert f"const {dtype.c_name} * RESTRICT _data_f = (({dtype.c_name} * RESTRICT const)(_data_f[index + 1]));" in code
assert f"{dtype.c_name} * RESTRICT _data_g = (({dtype.c_name} * RESTRICT )(_data_g[index + 1]));" in code
if target == Target.CPU:
assert code.count("for") == f.spatial_dimensions + 1
......
This diff is collapsed.
......@@ -39,7 +39,7 @@ def test_two_arguments(dtype, func, target):
@pytest.mark.parametrize('dtype', ["float64", "float32"])
@pytest.mark.parametrize('func', [sp.sin, sp.cos, sp.sinh, sp.cosh, sp.atan])
@pytest.mark.parametrize('func', [sp.sin, sp.cos, sp.sinh, sp.cosh, sp.atan, sp.floor, sp.ceiling])
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
def test_single_arguments(dtype, func, target):
if target == ps.Target.GPU:
......@@ -58,7 +58,8 @@ def test_single_arguments(dtype, func, target):
ast = ps.create_kernel(up, config=config)
code = ps.get_code_str(ast)
if dtype == 'float32':
assert func.__name__.lower() in code
func_name = func.__name__.lower() if func is not sp.ceiling else "ceil"
assert func_name in code
kernel = ast.compile()
dh.all_to_gpu()
......
This diff is collapsed.
......@@ -32,7 +32,7 @@ if get_compiler_config()['os'] == 'windows':
def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), offset_values=None):
if target == Target.GPU:
pytest.importorskip('cupy')
if instruction_sets and {'neon', 'sve', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni':
if instruction_sets and {'neon', 'sve', 'sve2', 'sme', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni':
pytest.xfail('AES not yet implemented for this architecture')
if rng == 'aesni' and len(keys) == 2:
keys *= 2
......@@ -122,7 +122,7 @@ def test_rng_offsets(kind, vectorized):
@pytest.mark.parametrize('rng', ('philox', 'aesni'))
@pytest.mark.parametrize('precision,dtype', (('float', 'float'), ('double', 'double')))
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None):
if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni':
if (target in ['neon', 'vsx', 'rvv', 'sme'] or target.startswith('sve')) and rng == 'aesni':
pytest.xfail('AES not yet implemented for this architecture')
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target}
......
......@@ -146,6 +146,16 @@ def test_add_subexpressions_for_field_reads():
assert isinstance(ac3.subexpressions[0].lhs, TypedSymbol)
assert ac3.subexpressions[0].lhs.dtype == BasicType("float32")
# added check for early out of add_subexpressions_for_field_reads is no fields appear on the rhs (See #92)
main = [Assignment(s[0, 0](0), 3.0),
Assignment(s[0, 0](1), 4.0)]
ac4 = AssignmentCollection(main, subexpressions)
assert len(ac4.subexpressions) == 0
ac5 = add_subexpressions_for_field_reads(ac4)
assert ac5 is not None
assert ac4 is ac5
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
......
import numpy as np
import sympy as sp
import pytest
from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
from pystencils import (
Assignment,
Field,
TypedSymbol,
create_kernel,
make_slice,
Target,
create_data_handling,
)
from pystencils.simp import sympy_cse_on_assignment_list
def test_sliced_iteration():
@pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
def test_sliced_iteration(target):
if target == Target.GPU:
pytest.importorskip("cupy")
size = (4, 4)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr)
dst_field = Field.create_from_numpy_array('dst', dst_arr)
dh = create_data_handling(size, default_target=target, default_ghost_layers=0)
src_field = dh.add_array("src", 1)
dst_field = dh.add_array("dst", 1)
dh.fill(src_field.name, 1.0, ghost_layers=True)
dh.fill(dst_field.name, 0.0, ghost_layers=True)
a, b = sp.symbols("a b")
update_rule = Assignment(dst_field[0, 0],
(a * src_field[0, 1] + a * src_field[0, -1] +
b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
update_rule = Assignment(
dst_field[0, 0],
(
a * src_field[0, 1]
+ a * src_field[0, -1]
+ b * src_field[1, 0]
+ b * src_field[-1, 0]
)
/ 4,
)
s = make_slice[1:3, 1]
kernel = create_kernel(
sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target
).compile()
if target == Target.GPU:
dh.all_to_gpu()
dh.run_kernel(kernel, a=1.0, b=1.0)
if target == Target.GPU:
dh.all_to_cpu()
expected_result = np.zeros(size)
expected_result[1:3, 1] = 1
np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)
@pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
def test_symbols_in_slice(target):
if target == Target.GPU:
pytest.xfail("Iteration slices including arbitrary symbols are currently broken on GPU")
size = (4, 4)
dh = create_data_handling(size, default_target=target, default_ghost_layers=0)
src_field = dh.add_array("src", 1)
dst_field = dh.add_array("dst", 1)
dh.fill(src_field.name, 1.0, ghost_layers=True)
dh.fill(dst_field.name, 0.0, ghost_layers=True)
a, b = sp.symbols("a b")
update_rule = Assignment(
dst_field[0, 0],
(
a * src_field[0, 1]
+ a * src_field[0, -1]
+ b * src_field[1, 0]
+ b * src_field[-1, 0]
)
/ 4,
)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
kernel = create_kernel(
sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target
).compile()
if target == Target.GPU:
dh.all_to_gpu()
dh.run_kernel(kernel, a=1.0, b=1.0, x_end=x_end_value)
kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
if target == Target.GPU:
dh.all_to_cpu()
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(expected_result, dst_arr)
np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)
......@@ -143,10 +143,10 @@ def test_aligned_and_nt_stores(openmp, instruction_set=instruction_set):
# Without the base pointer spec, the inner store is not aligned
config = pystencils.config.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp)
ast = ps.create_kernel(update_rule, config=config)
if instruction_set in ['sse'] or instruction_set.startswith('avx'):
if instruction_set in ['sse'] or instruction_set.startswith('avx') or instruction_set.startswith('sve'):
assert 'stream' in ast.instruction_set
assert 'streamFence' in ast.instruction_set
if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'):
if instruction_set in ['neon', 'vsx', 'rvv']:
assert 'cachelineZero' in ast.instruction_set
if instruction_set in ['vsx']:
assert 'storeAAndFlushCacheline' in ast.instruction_set
......@@ -331,7 +331,7 @@ def test_logical_operators(instruction_set=instruction_set):
def test_hardware_query():
assert {'sse', 'neon', 'sve', 'vsx', 'rvv'}.intersection(supported_instruction_sets)
assert {'sse', 'neon', 'sve', 'sve2', 'sme', 'vsx', 'rvv'}.intersection(supported_instruction_sets)
def test_vectorised_pow(instruction_set=instruction_set):
......
......@@ -8,8 +8,10 @@ import sympy as sp
import pystencils as ps
from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets,
get_vector_instruction_set)
from . import test_vectorization
from pystencils.enums import Target
from pystencils.typing import CFunction
from . import test_vectorization
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
......@@ -39,7 +41,7 @@ def test_vectorisation_varying_arch(instruction_set):
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_vectorized_abs(instruction_set, dtype):
def test_vectorized_abs_field(instruction_set, dtype):
"""Some instructions sets have abs, some don't.
Furthermore, the special treatment of unary minus makes this data type-sensitive too.
"""
......@@ -58,24 +60,47 @@ def test_vectorized_abs(instruction_set, dtype):
np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_vectorized_abs_scalar(instruction_set):
"""Some instructions sets have abs, some don't.
Furthermore, the special treatment of unary minus makes this data type-sensitive too.
"""
arr = np.zeros((2 ** 2 + 2, 2 ** 3 + 2), dtype="float64")
f = ps.fields(f=arr)
update_rule = [ps.Assignment(f.center(), sp.Abs(sp.Symbol("a")))]
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
ast = ps.create_kernel(update_rule, config=config)
func = ast.compile()
func(f=arr, a=-1)
np.testing.assert_equal(np.sum(arr[1:-1, 1:-1]), 2 ** 2 * 2 ** 3)
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_strided(instruction_set, dtype):
@pytest.mark.parametrize('nontemporal', [False, True])
def test_strided(instruction_set, dtype, nontemporal):
f, g = ps.fields(f"f, g : {dtype}[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set,
'nontemporal': nontemporal},
default_number_float=dtype)
if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) \
and instruction_set not in ['avx512', 'avx512vl', 'rvv'] and not instruction_set.startswith('sve'):
with pytest.warns(UserWarning) as warn:
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
default_number_float=dtype)
ast = ps.create_kernel(update_rule, config=config)
assert 'Could not vectorize loop' in warn[0].message.args[0]
else:
with pytest.warns(None) as warn:
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
default_number_float=dtype)
ast = ps.create_kernel(update_rule, config=config)
assert len(warn) == 0
instruction = 'streamS' if nontemporal and 'streamS' in ast.instruction_set else 'storeS'
assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
instruction = 'cachelineZero'
if instruction in ast.instruction_set:
assert ast.instruction_set[instruction] not in ps.get_code_str(ast)
# ps.show_code(ast)
func = ast.compile()
......@@ -226,7 +251,7 @@ def test_issue62(dtype, instruction_set, padding):
dy = sp.Symbol("dy")
src, dst, rhs = ps.fields(f"src, src_tmp, rhs: {dtype}[2D]", layout='fzyx')
up = ps.Assignment(src[0, 0], ((dy ** 2 * (src[1, 0] + src[-1, 0]))
up = ps.Assignment(dst[0, 0], ((dy ** 2 * (src[1, 0] + src[-1, 0]))
+ (dx ** 2 * (src[0, 1] + src[0, -1]))
- (rhs[0, 0] * dx ** 2 * dy ** 2)) / (2 * (dx ** 2 + dy ** 2)))
......@@ -269,6 +294,22 @@ def test_div_and_unevaluated_expr(dtype, instruction_set):
assert 'pow' not in code
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('instruction_set', ('sve', 'sve2', 'sme', 'rvv'))
def test_check_ast_parameters_sizeless(dtype, instruction_set):
f, g = ps.fields(f"f, g: {dtype}[3D]", layout='fzyx')
update_rule = [ps.Assignment(g.center(), 2 * f.center())]
config = pystencils.config.CreateKernelConfig(data_type=dtype,
cpu_vectorize_info={'instruction_set': instruction_set})
ast = ps.create_kernel(update_rule, config=config)
ast_symbols = [p.symbol for p in ast.get_parameters()]
assert ast.instruction_set['width'] not in ast_symbols
assert ast.instruction_set['intwidth'] not in ast_symbols
# TODO this test case needs a complete rework of the vectoriser. The reason is that the vectoriser does not
# TODO vectorise symbols at the moment because they could be strides or field sizes, thus involved in pointer arithmetic
# TODO This means that the vectoriser only works if fields are involved on the rhs.
......
This diff is collapsed.