From 2d65fcb85417eb3f778198883796a9019f0615d1 Mon Sep 17 00:00:00 2001 From: Michael Kuron <mkuron@icp.uni-stuttgart.de> Date: Tue, 3 Sep 2019 15:47:51 +0200 Subject: [PATCH] AES-NI: cleanup --- pystencils/include/aesni_rand.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pystencils/include/aesni_rand.h b/pystencils/include/aesni_rand.h index 07f22104c..723ea14df 100644 --- a/pystencils/include/aesni_rand.h +++ b/pystencils/include/aesni_rand.h @@ -4,7 +4,7 @@ #include <emmintrin.h> // SSE2 #include <wmmintrin.h> // AES -#ifdef __AVX512VL__ +#if defined(__AVX512VL__) || defined(__AVX512F__) #include <immintrin.h> // AVX* #else #include <smmintrin.h> // SSE4 @@ -38,7 +38,7 @@ QUALIFIERS __m128i aesni1xm128i(const __m128i & in, const __m128i & k) { QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v) { -#ifdef __AVX512VL__ +#if defined(__AVX512VL__) || defined(__AVX512F__) return _mm_cvtepu32_ps(v); #else __m128i v2 = _mm_srli_epi32(v, 1); @@ -49,12 +49,12 @@ QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v) #endif } -#if !defined(__AVX512VL__) && defined(__GNUC__) && __GNUC__ >= 5 +#if !defined(__AVX512VL__) && !defined(__AVX512F__) && defined(__GNUC__) && __GNUC__ >= 5 __attribute__((optimize("no-associative-math"))) #endif QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x) { -#ifdef __AVX512VL__ +#if defined(__AVX512VL__) || defined(__AVX512F__) return _mm_cvtepu64_pd(x); #else __m128i xH = _mm_srli_epi64(x, 32); @@ -81,17 +81,17 @@ QUALIFIERS void aesni_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3 y = _mm_srli_si128(y, 4); // calculate z = x ^ y << (53 - 32)) - __m128i z = _mm_sll_epi64(y, _mm_set_epi64x(53 - 32, 53 - 32)); + __m128i z = _mm_sll_epi64(y, _mm_set1_epi64x(53 - 32)); z = _mm_xor_si128(x, z); // convert uint64 to double __m128d rs = _my_cvtepu64_pd(z); // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0) #ifdef __FMA__ - rs = _mm_fmadd_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE), _mm_set_pd1(TWOPOW53_INV_DOUBLE/2.0)); + rs = _mm_fmadd_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE), _mm_set1_pd(TWOPOW53_INV_DOUBLE/2.0)); #else - rs = _mm_mul_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE)); - rs = _mm_add_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE/2.0)); + rs = _mm_mul_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE)); + rs = _mm_add_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE/2.0)); #endif // store result @@ -115,10 +115,10 @@ QUALIFIERS void aesni_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3, __m128 rs = _my_cvtepu32_ps(c128); // calculate rs * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f) #ifdef __FMA__ - rs = _mm_fmadd_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT), _mm_set_ps1(TWOPOW32_INV_FLOAT/2.0f)); + rs = _mm_fmadd_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT), _mm_set1_ps(TWOPOW32_INV_FLOAT/2.0f)); #else - rs = _mm_mul_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT)); - rs = _mm_add_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT/2.0f)); + rs = _mm_mul_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT)); + rs = _mm_add_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT/2.0f)); #endif // store result -- GitLab