Vectorisation Bug with sqrt
This code:
instruction_set = 'avx'
dtype = 'float32'
field_layout = 'fzyx'
config = ps.CreateKernelConfig(data_type=dtype,
cpu_vectorize_info={'instruction_set': instruction_set,
'assume_inner_stride_one': True,
'assume_aligned': False, 'nontemporal': False})
src_field = ps.Field.create_generic('pdfs', 2, dtype, index_dimensions=1, layout=field_layout, index_shape=(9,))
eq = [ps.Assignment(sp.Symbol("xi"), sum(src_field.center_vector)),
ps.Assignment(sp.Symbol("xi_2"), sp.Symbol("xi") * sp.sqrt(src_field.center))]
ps.create_kernel(eq, config=config).compile()
produces:
g++ -c -Ofast -DNDEBUG -fPIC -march=native -fopenmp -std=c++11 -I/home/markus/miniconda3/envs/pystencils/include/python3.9 -I/home/markus/pystencils/pystencils/pystencils/include -o /home/markus/.cache/pystencils/objectcache/tmpxd6uc9oo /home/markus/.cache/pystencils/objectcache/mod_904f8a018b25a7d1c9d288f1189161f438a5e0a7a0cff26c9e1568ae28c4bd2f.cpp
/home/markus/.cache/pystencils/objectcache/mod_904f8a018b25a7d1c9d288f1189161f438a5e0a7a0cff26c9e1568ae28c4bd2f.cpp: In function ‘void kernel_kernel(float*, int64_t, int64_t, int64_t, int64_t)’:
/home/markus/.cache/pystencils/objectcache/mod_904f8a018b25a7d1c9d288f1189161f438a5e0a7a0cff26c9e1568ae28c4bd2f.cpp:33:61: error: cannot convert ‘const __m256’ to ‘float’
33 | const __m256 xi_2 = _mm256_mul_ps(_mm256_set_ps(xi,xi,xi,xi,xi,xi,xi,xi),_mm256_sqrt_ps(_mm256_loadu_ps(& _data_pdfs_20_10[ctr_0])));
| ^~
| |
| const __m256
In file included from /usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/include/immintrin.h:43,
from /home/markus/.cache/pystencils/objectcache/mod_904f8a018b25a7d1c9d288f1189161f438a5e0a7a0cff26c9e1568ae28c4bd2f.cpp:2:
/usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/include/avxintrin.h:1256:22: note: initializing argument 1 of ‘__m256 _mm256_set_ps(float, float, float, float, float, float, float, float)’
1256 | _mm256_set_ps (float __A, float __B, float __C, float __D,