From 274a9630a9c2ae95a68891e03cd00fb5a263c89f Mon Sep 17 00:00:00 2001 From: Michael Kuron <mkuron@icp.uni-stuttgart.de> Date: Wed, 4 Sep 2019 11:24:59 +0200 Subject: [PATCH] Philox AVX512: fix shuffle mask --- pystencils/include/philox_rand.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pystencils/include/philox_rand.h b/pystencils/include/philox_rand.h index 27a7f0f14..d2567f543 100644 --- a/pystencils/include/philox_rand.h +++ b/pystencils/include/philox_rand.h @@ -436,10 +436,10 @@ QUALIFIERS void _philox4x32round(__m512i* ctr, __m512i* key) __m512i lohi1a = _mm512_mul_epu32(ctr[2], _mm512_set1_epi32(PHILOX_M4x32_1)); __m512i lohi1b = _mm512_mul_epu32(_mm512_srli_epi64(ctr[2], 32), _mm512_set1_epi32(PHILOX_M4x32_1)); - lohi0a = _mm512_shuffle_epi32(lohi0a, 0xD8); - lohi0b = _mm512_shuffle_epi32(lohi0b, 0xD8); - lohi1a = _mm512_shuffle_epi32(lohi1a, 0xD8); - lohi1b = _mm512_shuffle_epi32(lohi1b, 0xD8); + lohi0a = _mm512_shuffle_epi32(lohi0a, _MM_PERM_DBCA); + lohi0b = _mm512_shuffle_epi32(lohi0b, _MM_PERM_DBCA); + lohi1a = _mm512_shuffle_epi32(lohi1a, _MM_PERM_DBCA); + lohi1b = _mm512_shuffle_epi32(lohi1b, _MM_PERM_DBCA); __m512i lo0 = _mm512_unpacklo_epi32(lohi0a, lohi0b); __m512i hi0 = _mm512_unpackhi_epi32(lohi0a, lohi0b); -- GitLab