Skip to content
Snippets Groups Projects

Vectorization improvements

Merged Michael Kuron requested to merge ppc into master
1 file
+ 10
17
Compare changes
  • Side-by-side
  • Inline
@@ -13,7 +13,7 @@ def get_argument_string(function_shortcut):
@@ -13,7 +13,7 @@ def get_argument_string(function_shortcut):
return arg_string
return arg_string
def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q_registers=True):
def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
if instruction_set != 'neon':
if instruction_set != 'neon':
raise NotImplementedError(instruction_set)
raise NotImplementedError(instruction_set)
@@ -42,16 +42,9 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
@@ -42,16 +42,9 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
'float': 32,
'float': 32,
'int': 32}
'int': 32}
if q_registers is True:
width = 128 // bits[data_type]
q_reg = 'q'
intwidth = 128 // bits['int']
width = 128 // bits[data_type]
suffix = f'q_f{bits[data_type]}'
intwidth = 128 // bits['int']
suffix = f'q_f{bits[data_type]}'
else:
q_reg = ''
width = 64 // bits[data_type]
intwidth = 64 // bits['int']
suffix = f'_f{bits[data_type]}'
result = dict()
result = dict()
@@ -63,10 +56,10 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
@@ -63,10 +56,10 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
result[intrinsic_id] = 'v' + name + suffix + arg_string
result[intrinsic_id] = 'v' + name + suffix + arg_string
result['makeVecConst'] = f'vdup{q_reg}_n_f{bits[data_type]}' + '({0})'
result['makeVecConst'] = f'vdupq_n_f{bits[data_type]}' + '({0})'
result['makeVec'] = f'makeVec_f{bits[data_type]}' + '(' + ", ".join(['{' + str(i) + '}' for i in range(width)]) + \
result['makeVec'] = f'makeVec_f{bits[data_type]}' + '(' + ", ".join(['{' + str(i) + '}' for i in range(width)]) + \
')'
')'
result['makeVecConstInt'] = f'vdup{q_reg}_n_s{bits["int"]}' + '({0})'
result['makeVecConstInt'] = f'vdupq_n_s{bits["int"]}' + '({0})'
result['makeVecInt'] = f'makeVec_s{bits["int"]}' + '({0}, {1}, {2}, {3})'
result['makeVecInt'] = f'makeVec_s{bits["int"]}' + '({0}, {1}, {2}, {3})'
result['+int'] = f"vaddq_s{bits['int']}" + "({0}, {1})"
result['+int'] = f"vaddq_s{bits['int']}" + "({0}, {1})"
@@ -80,11 +73,11 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
@@ -80,11 +73,11 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
result['bool'] = f'uint{bits[data_type]}x{width}_t'
result['bool'] = f'uint{bits[data_type]}x{width}_t'
result['headers'] = ['<arm_neon.h>', '"arm_neon_helpers.h"']
result['headers'] = ['<arm_neon.h>', '"arm_neon_helpers.h"']
result['!='] = f'vmvn{q_reg}_u{bits[data_type]}({result["=="]})'
result['!='] = f'vmvnq_u{bits[data_type]}({result["=="]})'
result['&'] = f'vand{q_reg}_u{bits[data_type]}' + '({0}, {1})'
result['&'] = f'vandq_u{bits[data_type]}' + '({0}, {1})'
result['|'] = f'vorr{q_reg}_u{bits[data_type]}' + '({0}, {1})'
result['|'] = f'vorrq_u{bits[data_type]}' + '({0}, {1})'
result['blendv'] = f'vbsl{q_reg}_f{bits[data_type]}' + '({2}, {1}, {0})'
result['blendv'] = f'vbslq_f{bits[data_type]}' + '({2}, {1}, {0})'
result['any'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) > 0'
result['any'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) > 0'
result['all'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) == 16*0xff'
result['all'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) == 16*0xff'
Loading