From 2d65fcb85417eb3f778198883796a9019f0615d1 Mon Sep 17 00:00:00 2001
From: Michael Kuron <mkuron@icp.uni-stuttgart.de>
Date: Tue, 3 Sep 2019 15:47:51 +0200
Subject: [PATCH] AES-NI: cleanup

---
 pystencils/include/aesni_rand.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/pystencils/include/aesni_rand.h b/pystencils/include/aesni_rand.h
index 07f22104c..723ea14df 100644
--- a/pystencils/include/aesni_rand.h
+++ b/pystencils/include/aesni_rand.h
@@ -4,7 +4,7 @@
 
 #include <emmintrin.h> // SSE2
 #include <wmmintrin.h> // AES
-#ifdef __AVX512VL__
+#if defined(__AVX512VL__) || defined(__AVX512F__)
 #include <immintrin.h> // AVX*
 #else
 #include <smmintrin.h>  // SSE4
@@ -38,7 +38,7 @@ QUALIFIERS __m128i aesni1xm128i(const __m128i & in, const __m128i & k) {
 
 QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v)
 {
-#ifdef __AVX512VL__
+#if defined(__AVX512VL__) || defined(__AVX512F__)
     return _mm_cvtepu32_ps(v);
 #else
     __m128i v2 = _mm_srli_epi32(v, 1);
@@ -49,12 +49,12 @@ QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v)
 #endif
 }
 
-#if !defined(__AVX512VL__) && defined(__GNUC__) && __GNUC__ >= 5
+#if !defined(__AVX512VL__) && !defined(__AVX512F__) && defined(__GNUC__) && __GNUC__ >= 5
 __attribute__((optimize("no-associative-math")))
 #endif
 QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x)
 {
-#ifdef __AVX512VL__
+#if defined(__AVX512VL__) || defined(__AVX512F__)
     return _mm_cvtepu64_pd(x);
 #else
     __m128i xH = _mm_srli_epi64(x, 32);
@@ -81,17 +81,17 @@ QUALIFIERS void aesni_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3
     y = _mm_srli_si128(y, 4);
 
     // calculate z = x ^ y << (53 - 32))
-    __m128i z = _mm_sll_epi64(y, _mm_set_epi64x(53 - 32, 53 - 32));
+    __m128i z = _mm_sll_epi64(y, _mm_set1_epi64x(53 - 32));
     z = _mm_xor_si128(x, z);
 
     // convert uint64 to double
     __m128d rs = _my_cvtepu64_pd(z);
     // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
 #ifdef __FMA__
-    rs = _mm_fmadd_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE), _mm_set_pd1(TWOPOW53_INV_DOUBLE/2.0));
+    rs = _mm_fmadd_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE), _mm_set1_pd(TWOPOW53_INV_DOUBLE/2.0));
 #else
-    rs = _mm_mul_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE));
-    rs = _mm_add_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE/2.0));
+    rs = _mm_mul_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE));
+    rs = _mm_add_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE/2.0));
 #endif
 
     // store result
@@ -115,10 +115,10 @@ QUALIFIERS void aesni_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
     __m128 rs = _my_cvtepu32_ps(c128);
     // calculate rs * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
 #ifdef __FMA__
-    rs = _mm_fmadd_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT), _mm_set_ps1(TWOPOW32_INV_FLOAT/2.0f));
+    rs = _mm_fmadd_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT), _mm_set1_ps(TWOPOW32_INV_FLOAT/2.0f));
 #else
-    rs = _mm_mul_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT));
-    rs = _mm_add_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT/2.0f));
+    rs = _mm_mul_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT));
+    rs = _mm_add_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT/2.0f));
 #endif
 
     // store result
-- 
GitLab