Skip to content
Snippets Groups Projects

WIP: ARM NEON vectorization

Closed Michael Kuron requested to merge armneon into master
Compare and Show latest version
1 file
+ 3
3
Preferences
Compare changes
@@ -142,8 +142,8 @@ def get_vector_instruction_set_x86(data_type, instruction_set):
result['any'] = '!_ktestz_mask%d_u8({0}, {0})' % (size, )
result['all'] = '_kortestc_mask%d_u8({0}, {0})' % (size, )
result['blendv'] = '%s_mask_blend_%s({2}, {0}, {1})' % (pre, suf)
result['rsqrt'] = "_mm512_rsqrt14_%s({0})" % (suf,)
result['abs'] = "_mm512_abs_%s({0})" % (suf,)
result['rsqrt'] = "%s_rsqrt14_%s({0})" % (pre, suf)
result['abs'] = "%s_abs_%s({0})" % (pre, suf)
result['bool'] = "__mmask%d" % (size,)
params = " | ".join(["({{{i}}} ? {power} : 0)".format(i=i, power=2 ** i) for i in range(8)])
@@ -152,7 +152,7 @@ def get_vector_instruction_set_x86(data_type, instruction_set):
result['makeVecConstBool'] = f"__mmask8(({params}) )"
if instruction_set == 'avx' and data_type == 'float':
result['rsqrt'] = "_mm256_rsqrt_ps({0})"
result['rsqrt'] = "%s_rsqrt_%s({0})" % (pre, suf)
return result