Skip to content
Snippets Groups Projects

RNG SIMD

Merged Michael Kuron requested to merge philox-simd into master
1 file
+ 15
18
Compare changes
  • Side-by-side
  • Inline
@@ -172,8 +172,7 @@ QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x)
@@ -172,8 +172,7 @@ QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x)
#endif
#endif
}
}
template<bool high>
QUALIFIERS __m128d _uniform_double_hq(const bool high, __m128i x, __m128i y)
QUALIFIERS __m128d _uniform_double_hq(__m128i x, __m128i y)
{
{
// convert 32 to 64 bit
// convert 32 to 64 bit
if (high)
if (high)
@@ -263,10 +262,10 @@ QUALIFIERS void philox_double8(__m128i ctr0, __m128i ctr1, __m128i ctr2, __m128i
@@ -263,10 +262,10 @@ QUALIFIERS void philox_double8(__m128i ctr0, __m128i ctr1, __m128i ctr2, __m128i
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10
rnd1lo = _uniform_double_hq<false>(ctr[0], ctr[1]);
rnd1lo = _uniform_double_hq(false, ctr[0], ctr[1]);
rnd1hi = _uniform_double_hq<true>(ctr[0], ctr[1]);
rnd1hi = _uniform_double_hq(true, ctr[0], ctr[1]);
rnd2lo = _uniform_double_hq<false>(ctr[2], ctr[3]);
rnd2lo = _uniform_double_hq(false, ctr[2], ctr[3]);
rnd2hi = _uniform_double_hq<true>(ctr[2], ctr[3]);
rnd2hi = _uniform_double_hq(true, ctr[2], ctr[3]);
}
}
#endif
#endif
@@ -339,8 +338,7 @@ QUALIFIERS __m256d _my256_set_pd1(const double v)
@@ -339,8 +338,7 @@ QUALIFIERS __m256d _my256_set_pd1(const double v)
return _mm256_set_pd(v, v, v, v);
return _mm256_set_pd(v, v, v, v);
}
}
template<bool high>
QUALIFIERS __m256d _uniform_double_hq(const bool high, __m256i x, __m256i y)
QUALIFIERS __m256d _uniform_double_hq(__m256i x, __m256i y)
{
{
// convert 32 to 64 bit
// convert 32 to 64 bit
if (high)
if (high)
@@ -430,10 +428,10 @@ QUALIFIERS void philox_double16(__m256i ctr0, __m256i ctr1, __m256i ctr2, __m256
@@ -430,10 +428,10 @@ QUALIFIERS void philox_double16(__m256i ctr0, __m256i ctr1, __m256i ctr2, __m256
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10
rnd1lo = _uniform_double_hq<false>(ctr[0], ctr[1]);
rnd1lo = _uniform_double_hq(false, ctr[0], ctr[1]);
rnd1hi = _uniform_double_hq<true>(ctr[0], ctr[1]);
rnd1hi = _uniform_double_hq(true, ctr[0], ctr[1]);
rnd2lo = _uniform_double_hq<false>(ctr[2], ctr[3]);
rnd2lo = _uniform_double_hq(false, ctr[2], ctr[3]);
rnd2hi = _uniform_double_hq<true>(ctr[2], ctr[3]);
rnd2hi = _uniform_double_hq(true, ctr[2], ctr[3]);
}
}
#endif
#endif
@@ -477,8 +475,7 @@ QUALIFIERS __m512d _my512_set_pd1(const double v)
@@ -477,8 +475,7 @@ QUALIFIERS __m512d _my512_set_pd1(const double v)
return _mm512_set_pd(v, v, v, v, v, v, v, v);
return _mm512_set_pd(v, v, v, v, v, v, v, v);
}
}
template<bool high>
QUALIFIERS __m512d _uniform_double_hq(const bool high, __m512i x, __m512i y)
QUALIFIERS __m512d _uniform_double_hq(__m512i x, __m512i y)
{
{
// convert 32 to 64 bit
// convert 32 to 64 bit
if (high)
if (high)
@@ -552,10 +549,10 @@ QUALIFIERS void philox_double32(__m512i ctr0, __m512i ctr1, __m512i ctr2, __m512
@@ -552,10 +549,10 @@ QUALIFIERS void philox_double32(__m512i ctr0, __m512i ctr1, __m512i ctr2, __m512
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10
_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10
rnd1lo = _uniform_double_hq<false>(ctr[0], ctr[1]);
rnd1lo = _uniform_double_hq(false, ctr[0], ctr[1]);
rnd1hi = _uniform_double_hq<true>(ctr[0], ctr[1]);
rnd1hi = _uniform_double_hq(true, ctr[0], ctr[1]);
rnd2lo = _uniform_double_hq<false>(ctr[2], ctr[3]);
rnd2lo = _uniform_double_hq(false, ctr[2], ctr[3]);
rnd2hi = _uniform_double_hq<true>(ctr[2], ctr[3]);
rnd2hi = _uniform_double_hq(true, ctr[2], ctr[3]);
}
}
#endif
#endif
Loading