Skip to content
Snippets Groups Projects
Commit 3ab12930 authored by Markus Holzer's avatar Markus Holzer
Browse files

Fix vectorisation for power pc

parent 4b2bd4d3
No related branches found
No related tags found
No related merge requests found
...@@ -82,10 +82,6 @@ try: ...@@ -82,10 +82,6 @@ try:
except ImportError: except ImportError:
collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/datahandling/vtk.py")] collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/datahandling/vtk.py")]
# TODO: Remove if Ubuntu 18.04 is no longer supported
if pytest_version < 50403:
collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_jupyter_extensions.ipynb")]
collect_ignore += [os.path.join(SCRIPT_FOLDER, 'setup.py')] collect_ignore += [os.path.join(SCRIPT_FOLDER, 'setup.py')]
for root, sub_dirs, files in os.walk('.'): for root, sub_dirs, files in os.walk('.'):
......
...@@ -77,6 +77,8 @@ class CachelineSize(ast.Node): ...@@ -77,6 +77,8 @@ class CachelineSize(ast.Node):
def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best', def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False, assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False,
assume_inner_stride_one: bool = False, assume_sufficient_line_padding: bool = True): assume_inner_stride_one: bool = False, assume_sufficient_line_padding: bool = True):
# TODO we first introduce the remainder loop and then check if we can even vectorise. Maybe first copy the ast
# and return the copied version on failure
"""Explicit vectorization using SIMD vectorization via intrinsics. """Explicit vectorization using SIMD vectorization via intrinsics.
Args: Args:
......
...@@ -61,33 +61,46 @@ def test_vectorized_abs(instruction_set, dtype): ...@@ -61,33 +61,46 @@ def test_vectorized_abs(instruction_set, dtype):
@pytest.mark.parametrize('dtype', ('float', 'double')) @pytest.mark.parametrize('dtype', ('float', 'double'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_strided(instruction_set, dtype): def test_strided(instruction_set, dtype):
npdtype = np.float64 if dtype == 'double' else np.float32 type_string = "float64" if dtype == 'double' else "float32"
f, g = ps.fields(f"f, g : float{64 if dtype=='double' else 32}[2D]") f, g = ps.fields(f"f, g : {type_string}[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)] update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) and instruction_set not in ['avx512', 'rvv'] and not instruction_set.startswith('sve'): if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) and instruction_set not in ['avx512', 'rvv'] and not instruction_set.startswith('sve'):
with pytest.warns(UserWarning) as warn: with pytest.warns(UserWarning) as warn:
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}, config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
default_number_float=npdtype) default_number_float=type_string)
ast = ps.create_kernel(update_rule, config=config) ast = ps.create_kernel(update_rule, config=config)
assert 'Could not vectorize loop' in warn[0].message.args[0] assert 'Could not vectorize loop' in warn[0].message.args[0]
else: else:
with pytest.warns(None) as warn: with pytest.warns(None) as warn:
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}, config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
default_number_float=npdtype) default_number_float=type_string)
ast = ps.create_kernel(update_rule, config=config) ast = ps.create_kernel(update_rule, config=config)
assert len(warn) == 0 assert len(warn) == 0
# ps.show_code(ast)
ps.show_code(ast)
func = ast.compile() func = ast.compile()
ref_func = ps.create_kernel(update_rule).compile() ref_config = pystencils.config.CreateKernelConfig(default_number_float=type_string)
ref_func = ps.create_kernel(update_rule, config=ref_config).compile()
arr = np.random.random((23 + 2, 17 + 2)).astype(npdtype) # For some reason other array creations fail on the emulated ppc pipeline
dst = np.zeros_like(arr, dtype=npdtype) size = (25, 19)
ref = np.zeros_like(arr, dtype=npdtype) arr = np.zeros(size).astype(type_string)
for i in range(size[0]):
for j in range(size[1]):
arr[i, j] = i * j
dst = np.zeros_like(arr, dtype=type_string)
ref = np.zeros_like(arr, dtype=type_string)
func(g=dst, f=arr) func(g=dst, f=arr)
ref_func(g=ref, f=arr) ref_func(g=ref, f=arr)
np.testing.assert_almost_equal(dst, ref, 13 if dtype == 'double' else 5)
print("dst: ", dst)
print("np array: ", arr)
np.testing.assert_almost_equal(dst[1:-1, 1:-1], ref[1:-1, 1:-1], 13 if dtype == 'double' else 5)
@pytest.mark.parametrize('dtype', ('float', 'double')) @pytest.mark.parametrize('dtype', ('float', 'double'))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment