Skip to content
Snippets Groups Projects

Improve ARM64 support

Merged Michael Kuron requested to merge apple-arm64 into master
Files
13
@@ -33,21 +33,21 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
'<': 'clt[0, 1]',
'>=': 'cge[0, 1]',
'>': 'cgt[0, 1]',
# '&': 'and[0, 1]', -> only for integer values available
# '|': 'orr[0, 1]'
}
bits = {'double': 64,
'float': 32}
'float': 32,
'int': 32}
if q_registers is True:
q_reg = 'q'
width = 128 // bits[data_type]
intwidth = 128 // bits[data_type]
suffix = f'q_f{bits[data_type]}'
else:
q_reg = ''
width = 64 // bits[data_type]
intwidth = 64 // bits[data_type]
suffix = f'_f{bits[data_type]}'
result = dict()
@@ -60,16 +60,26 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
result[intrinsic_id] = 'v' + name + suffix + arg_string
result['makeVecConst'] = 'vdup' + q_reg + '_n_f' + str(bits[data_type]) + '({0})'
result['makeVec'] = 'vdup' + q_reg + '_n_f' + str(bits[data_type]) + '({0})'
result['makeVecConst'] = f'vdup{q_reg}_n_f{bits[data_type]}' + '({0})'
result['makeVec'] = f'vdup{q_reg}_n_f{bits[data_type]}' + '({0})'
result['makeVecConstInt'] = f'vdup{q_reg}_n_s{bits["int"]}' + '({0})'
result['makeVecInt'] = f'vdup{q_reg}_n_s{bits["int"]}' + '({0})'
result['+int'] = f"vaddq_s{bits['int']}" + "({0}, {1})"
result['rsqrt'] = None
result['width'] = width
result['double'] = 'float64x' + str(width) + '_t'
result['float'] = 'float32x' + str(width * 2) + '_t'
result['intwidth'] = intwidth
result[data_type] = f'float{bits[data_type]}x{width}_t'
result['int'] = f'int{bits["int"]}x{bits[data_type]}_t'
result['bool'] = f'uint{bits[data_type]}x{width}_t'
result['headers'] = ['<arm_neon.h>']
result['!='] = 'vmvnq_u%d(%s)' % (bits[data_type], result['=='])
result['!='] = f'vmvn{q_reg}_u{bits[data_type]}({result["=="]})'
result['&'] = f'vand{q_reg}_u{bits[data_type]}' + '({0}, {1})'
result['|'] = f'vorr{q_reg}_u{bits[data_type]}' + '({0}, {1})'
result['blendv'] = f'vbsl{q_reg}_f{bits[data_type]}' + '({2}, {1}, {0})'
return result
Loading