Skip to content

Vectorisation Bug with sqrt

This code:

instruction_set = 'avx'
dtype = 'float32'
field_layout = 'fzyx'

config = ps.CreateKernelConfig(data_type=dtype,
                               cpu_vectorize_info={'instruction_set': instruction_set,
                                                   'assume_inner_stride_one': True,
                                                   'assume_aligned': False, 'nontemporal': False})

src_field = ps.Field.create_generic('pdfs', 2, dtype, index_dimensions=1, layout=field_layout, index_shape=(9,))

eq = [ps.Assignment(sp.Symbol("xi"), sum(src_field.center_vector)),
      ps.Assignment(sp.Symbol("xi_2"), sp.Symbol("xi") * sp.sqrt(src_field.center))]

ps.create_kernel(eq, config=config).compile()

produces:

g++ -c -Ofast -DNDEBUG -fPIC -march=native -fopenmp -std=c++11 -I/home/markus/miniconda3/envs/pystencils/include/python3.9 -I/home/markus/pystencils/pystencils/pystencils/include -o /home/markus/.cache/pystencils/objectcache/tmpxd6uc9oo /home/markus/.cache/pystencils/objectcache/mod_904f8a018b25a7d1c9d288f1189161f438a5e0a7a0cff26c9e1568ae28c4bd2f.cpp
/home/markus/.cache/pystencils/objectcache/mod_904f8a018b25a7d1c9d288f1189161f438a5e0a7a0cff26c9e1568ae28c4bd2f.cpp: In function ‘void kernel_kernel(float*, int64_t, int64_t, int64_t, int64_t)’:
/home/markus/.cache/pystencils/objectcache/mod_904f8a018b25a7d1c9d288f1189161f438a5e0a7a0cff26c9e1568ae28c4bd2f.cpp:33:61: error: cannot convert ‘const __m256’ to ‘float’
   33 |             const __m256 xi_2 = _mm256_mul_ps(_mm256_set_ps(xi,xi,xi,xi,xi,xi,xi,xi),_mm256_sqrt_ps(_mm256_loadu_ps(& _data_pdfs_20_10[ctr_0])));
      |                                                             ^~
      |                                                             |
      |                                                             const __m256
In file included from /usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/include/immintrin.h:43,
                 from /home/markus/.cache/pystencils/objectcache/mod_904f8a018b25a7d1c9d288f1189161f438a5e0a7a0cff26c9e1568ae28c4bd2f.cpp:2:
/usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/include/avxintrin.h:1256:22: note:   initializing argument 1 of ‘__m256 _mm256_set_ps(float, float, float, float, float, float, float, float)’
 1256 | _mm256_set_ps (float __A, float __B, float __C, float __D,