diff --git a/pystencils/backends/ppc_instruction_sets.py b/pystencils/backends/ppc_instruction_sets.py
index f4df9d3b27d7c0827a2442de92937e8d255aa610..bb9e3e85113023c0f2c82ddce432a464891ca756 100644
--- a/pystencils/backends/ppc_instruction_sets.py
+++ b/pystencils/backends/ppc_instruction_sets.py
@@ -29,7 +29,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
         'loadA': 'ld[0x0, 0]',
         'storeU': 'xst[1, 0x0, 0]',
         'storeA': 'st[1, 0x0, 0]',
-        'stream': 'st[1, 0x0, 0]',
+        'stream': 'stl[1, 0x0, 0]',
 
         'abs': 'abs[0]',
         '==': 'cmpeq[0, 1]',
diff --git a/pystencils/include/philox_rand.h b/pystencils/include/philox_rand.h
index 1b8b9d9f6ab5cb694deb7508014a5c0d080fbb6b..4d81d43e420f716ad3d07f4d58d68dcb127e2f5a 100644
--- a/pystencils/include/philox_rand.h
+++ b/pystencils/include/philox_rand.h
@@ -16,12 +16,15 @@
 #include <arm_neon.h>
 #endif
 
-#if defined(__powerpc__) && defined(__GNUC__) && !defined(__clang__) && !defined(__ibmxl__)
+#if defined(__powerpc__) && defined(__GNUC__) && !defined(__clang__) && !defined(__xlC__)
 #include <ppu_intrinsics.h>
 #endif
 #ifdef __ALTIVEC__
 #include <altivec.h>
 #undef bool
+#ifndef _ARCH_PWR8
+#include <pveclib/vec_int64_ppc.h>
+#endif
 #endif
 
 #ifndef __CUDA_ARCH__
@@ -46,7 +49,7 @@ QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip)
 {
 #ifndef __CUDA_ARCH__
     // host code
-#if defined(__powerpc__) && (!defined(__clang__) || defined(__ibmxl__))
+#if defined(__powerpc__) && (!defined(__clang__) || defined(__xlC__))
     *hip = __mulhwu(a,b);
     return a*b;
 #else
@@ -298,7 +301,12 @@ QUALIFIERS void philox_double2(uint32 ctr0, __m128i ctr1, uint32 ctr2, uint32 ct
 #ifdef __ALTIVEC__
 QUALIFIERS void _philox4x32round(__vector unsigned int* ctr, __vector unsigned int* key)
 {
-#ifdef __POWER10_VECTOR__
+#ifndef _ARCH_PWR8
+    __vector unsigned int lo0 = vec_mul(ctr[0], vec_splats(PHILOX_M4x32_0));
+    __vector unsigned int lo1 = vec_mul(ctr[2], vec_splats(PHILOX_M4x32_1));
+    __vector unsigned int hi0 = vec_mulhuw(ctr[0], vec_splats(PHILOX_M4x32_0));
+    __vector unsigned int hi1 = vec_mulhuw(ctr[2], vec_splats(PHILOX_M4x32_1));
+#elif defined(_ARCH_PWR10)
     __vector unsigned int lo0 = vec_mul(ctr[0], vec_splats(PHILOX_M4x32_0));
     __vector unsigned int lo1 = vec_mul(ctr[2], vec_splats(PHILOX_M4x32_1));
     __vector unsigned int hi0 = vec_mulh(ctr[0], vec_splats(PHILOX_M4x32_0));
@@ -364,11 +372,15 @@ QUALIFIERS __vector double _uniform_double_hq(__vector unsigned int x, __vector
 #endif
 
     // calculate z = x ^ y << (53 - 32))
+#ifdef _ARCH_PWR8
     __vector unsigned long long z = vec_sl((__vector unsigned long long) y, vec_splats(53ULL - 32ULL));
+#else
+    __vector unsigned long long z = vec_vsld((__vector unsigned long long) y, vec_splats(53ULL - 32ULL));
+#endif
     z = vec_xor((__vector unsigned long long) x, z);
 
     // convert uint64 to double
-#ifdef __ibmxl__
+#ifdef __xlC__
     __vector double rs = vec_ctd(z, 0);
 #else
     __vector double rs = vec_ctf(z, 0);