From 437f18b9b8112a8a3c86ee97aa32aa5b04bd5cbc Mon Sep 17 00:00:00 2001
From: Michael Kuron <mkuron@icp.uni-stuttgart.de>
Date: Sun, 14 Feb 2021 08:57:13 +0100
Subject: [PATCH] Fix _my_cvtepu64_pd with fast-math clang

---
 pystencils/include/myintrin.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pystencils/include/myintrin.h b/pystencils/include/myintrin.h
index a94c316c4..6c1d9d4d0 100644
--- a/pystencils/include/myintrin.h
+++ b/pystencils/include/myintrin.h
@@ -36,6 +36,8 @@ QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x)
 {
 #ifdef __AVX512VL__
     return _mm_cvtepu64_pd(x);
+#elif defined(__clang__)
+    return __builtin_convertvector((uint64_t __attribute__((__vector_size__(16)))) x, __m128d);
 #else
     __m128i xH = _mm_srli_epi64(x, 32);
     xH = _mm_or_si128(xH, _mm_castpd_si128(_mm_set1_pd(19342813113834066795298816.)));          //  2^84
@@ -85,6 +87,8 @@ QUALIFIERS __m256d _my256_cvtepu64_pd(const __m256i x)
 {
 #ifdef __AVX512VL__
     return _mm256_cvtepu64_pd(x);
+#elif defined(__clang__)
+    return __builtin_convertvector((uint64_t __attribute__((__vector_size__(32)))) x, __m256d);
 #else
     __m256i xH = _mm256_srli_epi64(x, 32);
     xH = _mm256_or_si256(xH, _mm256_castpd_si256(_mm256_set1_pd(19342813113834066795298816.)));          //  2^84
-- 
GitLab