Skip to content
Snippets Groups Projects
Commit 6562a56c authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Support shorter SVE vectors via predicates

parent 30341d80
No related branches found
No related tags found
1 merge request!232SVE vectorization
Pipeline #31622 passed
...@@ -65,12 +65,14 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'): ...@@ -65,12 +65,14 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result = dict() result = dict()
result['bytes'] = bitwidth // 8 result['bytes'] = bitwidth // 8
predicate = f'{prefix}whilelt_b{bits[data_type]}(0, {width})'
int_predicate = f'{prefix}whilelt_b{bits["int"]}(0, {intwidth})'
for intrinsic_id, function_shortcut in base_names.items(): for intrinsic_id, function_shortcut in base_names.items():
function_shortcut = function_shortcut.strip() function_shortcut = function_shortcut.strip()
name = function_shortcut[:function_shortcut.index('[')] name = function_shortcut[:function_shortcut.index('[')]
arg_string = get_argument_string(function_shortcut, first=f'{prefix}ptrue_b{bits[data_type]}()' arg_string = get_argument_string(function_shortcut, first=predicate if prefix == 'sv' else '')
if prefix == 'sv' else '')
if prefix == 'sv' and not name.startswith('ld') and not name.startswith('st') and not name.startswith(cmp): if prefix == 'sv' and not name.startswith('ld') and not name.startswith('st') and not name.startswith(cmp):
undef = '_x' undef = '_x'
else: else:
...@@ -86,20 +88,19 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'): ...@@ -86,20 +88,19 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result['makeVecConstInt'] = f'svdup_s{bits["int"]}' + '({0})' result['makeVecConstInt'] = f'svdup_s{bits["int"]}' + '({0})'
result['makeVecIndex'] = f'svindex_s{bits["int"]}' + '({0}, {1})' result['makeVecIndex'] = f'svindex_s{bits["int"]}' + '({0}, {1})'
result['+int'] = f"svadd_s{bits['int']}_x(svptrue_b{bits['int']}(), " + "{0}, {1})" result['+int'] = f"svadd_s{bits['int']}_x({int_predicate}, " + "{0}, {1})"
attr = f' __attribute__((arm_sve_vector_bits({bitwidth})))' result[data_type] = f'svfloat{bits[data_type]}_st'
result[data_type] = f'svfloat{bits[data_type]}_t{attr}' result['int'] = f'svint{bits["int"]}_st'
result['int'] = f'svint{bits["int"]}_t{attr}' result['bool'] = 'svbool_st'
result['bool'] = f'svbool_t{attr}'
result['headers'] = ['<arm_sve.h>', '"arm_neon_helpers.h"'] result['headers'] = ['<arm_sve.h>', '"arm_neon_helpers.h"']
result['&'] = f'svand_b_z(svptrue_b{bits[data_type]}(),' + ' {0}, {1})' result['&'] = f'svand_b_z({predicate},' + ' {0}, {1})'
result['|'] = f'svorr_b_z(svptrue_b{bits[data_type]}(),' + ' {0}, {1})' result['|'] = f'svorr_b_z({predicate},' + ' {0}, {1})'
result['blendv'] = f'svsel_f{bits[data_type]}' + '({2}, {1}, {0})' result['blendv'] = f'svsel_f{bits[data_type]}' + '({2}, {1}, {0})'
result['any'] = f'svptest_any(svptrue_b{bits[data_type]}(), {{0}}) > 0' result['any'] = f'svptest_any({predicate}, {{0}})'
result['all'] = f'svcntp_b{bits[data_type]}(svptrue_b{bits[data_type]}(), {{0}}) == {width}' result['all'] = f'svcntp_b{bits[data_type]}({predicate}, {{0}}) == {width}'
result['compile_flags'] = [f'-msve-vector-bits={bitwidth}'] result['compile_flags'] = [f'-msve-vector-bits={bitwidth}']
else: else:
......
import os import math
import platform import platform
from ctypes import CDLL
from pystencils.backends.x86_instruction_sets import get_vector_instruction_set_x86 from pystencils.backends.x86_instruction_sets import get_vector_instruction_set_x86
from pystencils.backends.arm_instruction_sets import get_vector_instruction_set_arm from pystencils.backends.arm_instruction_sets import get_vector_instruction_set_arm
...@@ -59,10 +60,17 @@ def get_supported_instruction_sets(): ...@@ -59,10 +60,17 @@ def get_supported_instruction_sets():
if flags.issuperset(required_neon_flags): if flags.issuperset(required_neon_flags):
result.append("neon") result.append("neon")
if flags.issuperset(required_sve_flags): if flags.issuperset(required_sve_flags):
length_file = '/proc/sys/abi/sve_default_vector_length' if platform.system() == 'Linux':
if os.path.exists(length_file): libc = CDLL('libc.so.6')
length = 8 * int(open(length_file, 'r').read()) native_length = 8 * libc.prctl(51, 0, 0, 0, 0) # PR_SVE_GET_VL
result.append(f"sve{length}") if native_length < 0:
raise OSError("SVE length query failed")
pwr2_length = int(2**math.floor(math.log2(native_length)))
if pwr2_length % 256 == 0:
result.append(f"sve{pwr2_length//2}")
if native_length != pwr2_length:
result.append(f"sve{pwr2_length}")
result.append(f"sve{native_length}")
else: else:
result.append("sve") result.append("sve")
return result return result
......
#ifdef __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
#include <arm_sve.h>
typedef svbool_t svbool_st __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
typedef svfloat32_t svfloat32_st __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
typedef svfloat64_t svfloat64_st __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
typedef svint32_t svint32_st __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
#endif
#ifdef __ARM_NEON
inline float32x4_t makeVec_f32(float a, float b, float c, float d) inline float32x4_t makeVec_f32(float a, float b, float c, float d)
{ {
alignas(16) float data[4] = {a, b, c, d}; alignas(16) float data[4] = {a, b, c, d};
...@@ -17,6 +29,7 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d) ...@@ -17,6 +29,7 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d)
alignas(16) int data[4] = {a, b, c, d}; alignas(16) int data[4] = {a, b, c, d};
return vld1q_s32(data); return vld1q_s32(data);
} }
#endif
inline void cachelineZero(void * p) { inline void cachelineZero(void * p) {
__asm__ volatile("dc zva, %0"::"r"(p)); __asm__ volatile("dc zva, %0"::"r"(p));
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment