Skip to content
Snippets Groups Projects

Improve non-temporal stores

Merged Michael Kuron requested to merge nontemporal into master
Files
13
@@ -28,7 +28,6 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
@@ -28,7 +28,6 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
'loadA': 'ld1[0]',
'loadA': 'ld1[0]',
'storeU': 'st1[0, 1]',
'storeU': 'st1[0, 1]',
'storeA': 'st1[0, 1]',
'storeA': 'st1[0, 1]',
'stream': 'st1[0, 1]',
'abs': 'abs[0]',
'abs': 'abs[0]',
'==': 'ceq[0, 1]',
'==': 'ceq[0, 1]',
@@ -47,6 +46,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
@@ -47,6 +46,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
suffix = f'q_f{bits[data_type]}'
suffix = f'q_f{bits[data_type]}'
result = dict()
result = dict()
 
result['bytes'] = 16
for intrinsic_id, function_shortcut in base_names.items():
for intrinsic_id, function_shortcut in base_names.items():
function_shortcut = function_shortcut.strip()
function_shortcut = function_shortcut.strip()
@@ -81,4 +81,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
@@ -81,4 +81,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result['any'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) > 0'
result['any'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) > 0'
result['all'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) == 16*0xff'
result['all'] = f'vaddlvq_u8(vreinterpretq_u8_u{bits[data_type]}({{0}})) == 16*0xff'
 
result['cachelineSize'] = 'cachelineSize()'
 
result['cachelineZero'] = 'cachelineZero((void*) {0})'
 
return result
return result
Loading