Skip to content
Snippets Groups Projects
Commit 30341d80 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

remove SVE emulation

parent fb5f8713
No related branches found
No related tags found
1 merge request!232SVE vectorization
Pipeline #31609 passed
......@@ -16,7 +16,6 @@ RELEASE-VERSION
test-report
pystencils/boundaries/createindexlistcython.c
pystencils/boundaries/createindexlistcython.*.so
pystencils/include/farm_sve.h
pystencils_tests/tmp
pystencils_tests/kerncraft_inputs/.2d-5pt.c_kerncraft/
pystencils_tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
\ No newline at end of file
......@@ -22,7 +22,7 @@ def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, o
get_vector_instruction_set)
type_name = BasicType.numpy_name_to_c(np.dtype(dtype).name)
instruction_sets = get_supported_instruction_sets(including_emulated=True)
instruction_sets = get_supported_instruction_sets()
if instruction_sets is None:
byte_alignment = 64
elif byte_alignment == 'cacheline':
......
......@@ -22,9 +22,8 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
raise NotImplementedError("sizeless SVE is not implemented")
if instruction_set.startswith('sve'):
emulated = instruction_set.endswith('emu')
cmp = 'cmp'
bitwidth = int(instruction_set[3:-3] if emulated else instruction_set[3:])
bitwidth = int(instruction_set[3:])
elif instruction_set == 'neon':
cmp = 'c'
bitwidth = 128
......@@ -89,12 +88,12 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result['+int'] = f"svadd_s{bits['int']}_x(svptrue_b{bits['int']}(), " + "{0}, {1})"
attr = f' __attribute__((arm_sve_vector_bits({bitwidth})))' if not emulated else ''
attr = f' __attribute__((arm_sve_vector_bits({bitwidth})))'
result[data_type] = f'svfloat{bits[data_type]}_t{attr}'
result['int'] = f'svint{bits["int"]}_t{attr}'
result['bool'] = f'svbool_t{attr}'
result['headers'] = ['<farm_sve.h>' if emulated else '<arm_sve.h>', '"arm_neon_helpers.h"']
result['headers'] = ['<arm_sve.h>', '"arm_neon_helpers.h"']
result['&'] = f'svand_b_z(svptrue_b{bits[data_type]}(),' + ' {0}, {1})'
result['|'] = f'svorr_b_z(svptrue_b{bits[data_type]}(),' + ' {0}, {1})'
......@@ -126,7 +125,8 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result['any'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) > 0'
result['all'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) == 16*0xff'
if instruction_set == 'neon' or not emulated:
if bitwidth & (bitwidth - 1) == 0:
# only power-of-2 vector sizes will evenly divide a cacheline
result['cachelineSize'] = 'cachelineSize()'
result['cachelineZero'] = 'cachelineZero((void*) {0})'
......
......@@ -19,15 +19,12 @@ _cache = None
_cachelinesize = None
def get_supported_instruction_sets(including_emulated=False):
def get_supported_instruction_sets():
"""List of supported instruction sets on current hardware, or None if query failed."""
global _cache
if _cache is not None:
return _cache.copy()
if platform.system() == 'Darwin' and platform.machine() == 'arm64': # not supported by cpuinfo
if including_emulated and os.path.exists(os.path.join(os.path.dirname(__file__), '..', 'include',
'farm_sve.h')):
return ['sve512emu', 'neon']
return ['neon']
elif platform.machine().startswith('ppc64'): # no flags reported by cpuinfo
import subprocess
......@@ -60,9 +57,6 @@ def get_supported_instruction_sets(including_emulated=False):
if flags.issuperset(required_avx512_flags):
result.append("avx512")
if flags.issuperset(required_neon_flags):
if including_emulated and os.path.exists(os.path.join(os.path.dirname(__file__), '..', 'include',
'farm_sve.h')):
result.append('sve512emu')
result.append("neon")
if flags.issuperset(required_sve_flags):
length_file = '/proc/sys/abi/sve_default_vector_length'
......
......@@ -7,7 +7,7 @@ from pystencils.astnodes import Block, Conditional
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.cpu.vectorization import vec_all, vec_any
supported_instruction_sets = get_supported_instruction_sets(including_emulated=True) if get_supported_instruction_sets() else []
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
......
......@@ -11,7 +11,7 @@ from pystencils.data_types import TypedSymbol
RNGs = {('philox', 'float'): PhiloxFourFloats, ('philox', 'double'): PhiloxTwoDoubles,
('aesni', 'float'): AESNIFourFloats, ('aesni', 'double'): AESNITwoDoubles}
instruction_sets = get_supported_instruction_sets(including_emulated=True)
instruction_sets = get_supported_instruction_sets()
if get_compiler_config()['os'] == 'windows':
# skip instruction sets supported by the CPU but not by the compiler
if 'avx' in instruction_sets and ('/arch:avx2' not in get_compiler_config()['flags'].lower()
......
......@@ -7,7 +7,7 @@ from pystencils.cpu.vectorization import vectorize
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
from pystencils.transformations import replace_inner_stride_with_one
supported_instruction_sets = get_supported_instruction_sets(including_emulated=True)
supported_instruction_sets = get_supported_instruction_sets()
if supported_instruction_sets:
instruction_set = supported_instruction_sets[-1]
else:
......@@ -217,7 +217,7 @@ def test_logical_operators():
def test_hardware_query():
assert set(['sse', 'neon', 'vsx']).intersection(supported_instruction_sets) or \
any([iset.startswith('sve') and not iset.endswith('emu') for iset in supported_instruction_sets])
any([iset.startswith('sve') for iset in supported_instruction_sets])
def test_vectorised_pow():
......
......@@ -8,7 +8,7 @@ from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_s
get_vector_instruction_set)
from pystencils.data_types import cast_func, VectorType
supported_instruction_sets = get_supported_instruction_sets(including_emulated=True) if get_supported_instruction_sets() else []
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment