diff --git a/pystencils/backends/ppc_instruction_sets.py b/pystencils/backends/ppc_instruction_sets.py index f4df9d3b27d7c0827a2442de92937e8d255aa610..bb9e3e85113023c0f2c82ddce432a464891ca756 100644 --- a/pystencils/backends/ppc_instruction_sets.py +++ b/pystencils/backends/ppc_instruction_sets.py @@ -29,7 +29,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'): 'loadA': 'ld[0x0, 0]', 'storeU': 'xst[1, 0x0, 0]', 'storeA': 'st[1, 0x0, 0]', - 'stream': 'st[1, 0x0, 0]', + 'stream': 'stl[1, 0x0, 0]', 'abs': 'abs[0]', '==': 'cmpeq[0, 1]', diff --git a/pystencils/include/philox_rand.h b/pystencils/include/philox_rand.h index 1b8b9d9f6ab5cb694deb7508014a5c0d080fbb6b..4d81d43e420f716ad3d07f4d58d68dcb127e2f5a 100644 --- a/pystencils/include/philox_rand.h +++ b/pystencils/include/philox_rand.h @@ -16,12 +16,15 @@ #include <arm_neon.h> #endif -#if defined(__powerpc__) && defined(__GNUC__) && !defined(__clang__) && !defined(__ibmxl__) +#if defined(__powerpc__) && defined(__GNUC__) && !defined(__clang__) && !defined(__xlC__) #include <ppu_intrinsics.h> #endif #ifdef __ALTIVEC__ #include <altivec.h> #undef bool +#ifndef _ARCH_PWR8 +#include <pveclib/vec_int64_ppc.h> +#endif #endif #ifndef __CUDA_ARCH__ @@ -46,7 +49,7 @@ QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip) { #ifndef __CUDA_ARCH__ // host code -#if defined(__powerpc__) && (!defined(__clang__) || defined(__ibmxl__)) +#if defined(__powerpc__) && (!defined(__clang__) || defined(__xlC__)) *hip = __mulhwu(a,b); return a*b; #else @@ -298,7 +301,12 @@ QUALIFIERS void philox_double2(uint32 ctr0, __m128i ctr1, uint32 ctr2, uint32 ct #ifdef __ALTIVEC__ QUALIFIERS void _philox4x32round(__vector unsigned int* ctr, __vector unsigned int* key) { -#ifdef __POWER10_VECTOR__ +#ifndef _ARCH_PWR8 + __vector unsigned int lo0 = vec_mul(ctr[0], vec_splats(PHILOX_M4x32_0)); + __vector unsigned int lo1 = vec_mul(ctr[2], vec_splats(PHILOX_M4x32_1)); + __vector unsigned int hi0 = vec_mulhuw(ctr[0], vec_splats(PHILOX_M4x32_0)); + __vector unsigned int hi1 = vec_mulhuw(ctr[2], vec_splats(PHILOX_M4x32_1)); +#elif defined(_ARCH_PWR10) __vector unsigned int lo0 = vec_mul(ctr[0], vec_splats(PHILOX_M4x32_0)); __vector unsigned int lo1 = vec_mul(ctr[2], vec_splats(PHILOX_M4x32_1)); __vector unsigned int hi0 = vec_mulh(ctr[0], vec_splats(PHILOX_M4x32_0)); @@ -364,11 +372,15 @@ QUALIFIERS __vector double _uniform_double_hq(__vector unsigned int x, __vector #endif // calculate z = x ^ y << (53 - 32)) +#ifdef _ARCH_PWR8 __vector unsigned long long z = vec_sl((__vector unsigned long long) y, vec_splats(53ULL - 32ULL)); +#else + __vector unsigned long long z = vec_vsld((__vector unsigned long long) y, vec_splats(53ULL - 32ULL)); +#endif z = vec_xor((__vector unsigned long long) x, z); // convert uint64 to double -#ifdef __ibmxl__ +#ifdef __xlC__ __vector double rs = vec_ctd(z, 0); #else __vector double rs = vec_ctf(z, 0);