diff --git a/pystencils/backends/ppc_instruction_sets.py b/pystencils/backends/ppc_instruction_sets.py
index 72323f1cbceb72405b3ed88bf97411a231e4c47d..f4df9d3b27d7c0827a2442de92937e8d255aa610 100644
--- a/pystencils/backends/ppc_instruction_sets.py
+++ b/pystencils/backends/ppc_instruction_sets.py
@@ -23,7 +23,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
         '*': 'mul[0, 1]',
         '/': 'div[0, 1]',
         'sqrt': 'sqrt[0]',
-        'rsqrt': 'rsqrt[0]',
+        'rsqrt': 'rsqrte[0]',  # rsqrt is available too, but not on Clang
 
         'loadU': 'xl[0x0, 0]',
         'loadA': 'ld[0x0, 0]',
@@ -73,6 +73,12 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
 
         result[intrinsic_id] = 'vec_' + name + arg_string
 
+    if data_type == 'double':
+        # Clang and XL C++ are missing these for doubles
+        result['loadA'] = '(__vector double)' + result['loadA'].format('(float*) {0}')
+        result['storeA'] = result['storeA'].format('(float*) {0}', '(__vector float) {1}')
+        result['stream'] = result['stream'].format('(float*) {0}', '(__vector float) {1}')
+
     result['+int'] = "vec_add({0}, {1})"
 
     result['width'] = width
@@ -82,10 +88,12 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
     result['bool'] = f'__vector __bool {"long long" if data_type == "double" else "int"}'
     result['headers'] = ['<altivec.h>', '"ppc_altivec_helpers.h"']
 
-    result['makeVecConst'] = '((' + result[data_type] + '){{' + ", ".join(['{0}' for _ in range(width)]) + '}})'
-    result['makeVec'] = '((' + result[data_type] + '){{' + ", ".join(['{' + str(i) + '}' for i in range(width)]) + '}})'
-    result['makeVecConstInt'] = '((' + result['int'] + '){{' + ", ".join(['{0}' for _ in range(intwidth)]) + '}})'
-    result['makeVecInt'] = '((' + result['int'] + '){{{0}, {1}, {2}, {3}}})'
+    result['makeVecConst'] = '((' + result[data_type] + '){{' + \
+        ", ".join(['(' + data_type + ') {0}' for _ in range(width)]) + '}})'
+    result['makeVec'] = '((' + result[data_type] + '){{' + \
+        ", ".join(['{' + data_type + '} {' + str(i) + '}' for i in range(width)]) + '}})'
+    result['makeVecConstInt'] = '((' + result['int'] + '){{' + ", ".join(['(int) {0}' for _ in range(intwidth)]) + '}})'
+    result['makeVecInt'] = '((' + result['int'] + '){{(int) {0}, (int) {1}, (int) {2}, (int) {3}}})'
 
     result['any'] = 'vec_any_ne({0}, ((' + result['bool'] + ') {{' + ", ".join(['0'] * width) + '}}))'
     result['all'] = 'vec_all_ne({0}, ((' + result['bool'] + ') {{' + ", ".join(['0'] * width) + '}}))'
diff --git a/pystencils/backends/simd_instruction_sets.py b/pystencils/backends/simd_instruction_sets.py
index b3418eb3083f804338d804b1cb0fa709852ddbea..850f8ff6d4a9ae168a78c6588a69fd87f5e5f03e 100644
--- a/pystencils/backends/simd_instruction_sets.py
+++ b/pystencils/backends/simd_instruction_sets.py
@@ -14,19 +14,28 @@ def get_vector_instruction_set(data_type='double', instruction_set='avx'):
         return get_vector_instruction_set_x86(data_type, instruction_set)
 
 
+_cache = None
+
+
 def get_supported_instruction_sets():
     """List of supported instruction sets on current hardware, or None if query failed."""
+    global _cache
+    if _cache is not None:
+        return _cache.copy()
     if platform.system() == 'Darwin' and platform.machine() == 'arm64':  # not supported by cpuinfo
         return ['neon']
     elif platform.machine().startswith('ppc64'):  # no flags reported by cpuinfo
         import subprocess
+        import tempfile
         from pystencils.cpu.cpujit import get_compiler_config
-        command = [get_compiler_config()['command'], '-mcpu=native', '-dM', '-E', '-']
+        f = tempfile.NamedTemporaryFile(suffix='.cpp')
+        command = [get_compiler_config()['command'], '-mcpu=native', '-dM', '-E', f.name]
         macros = subprocess.check_output(command, input='', text=True)
         if '#define __VSX__' in macros and '#define __ALTIVEC__' in macros:
-            return ['vsx']
+            _cache = ['vsx']
         else:
-            return []
+            _cache = []
+        return _cache.copy()
     try:
         from cpuinfo import get_cpu_info
     except ImportError:
diff --git a/pystencils/include/philox_rand.h b/pystencils/include/philox_rand.h
index 2cc953c4a1621db3123ab6881d40ba96075c9ef0..1b8b9d9f6ab5cb694deb7508014a5c0d080fbb6b 100644
--- a/pystencils/include/philox_rand.h
+++ b/pystencils/include/philox_rand.h
@@ -296,29 +296,29 @@ QUALIFIERS void philox_double2(uint32 ctr0, __m128i ctr1, uint32 ctr2, uint32 ct
 
 
 #ifdef __ALTIVEC__
-QUALIFIERS void _philox4x32round(__vector uint32* ctr, __vector uint32* key)
+QUALIFIERS void _philox4x32round(__vector unsigned int* ctr, __vector unsigned int* key)
 {
 #ifdef __POWER10_VECTOR__
-    __vector uint32 lo0 = vec_mul(ctr[0], vec_splats(PHILOX_M4x32_0));
-    __vector uint32 lo1 = vec_mul(ctr[2], vec_splats(PHILOX_M4x32_1));
-    __vector uint32 hi0 = vec_mulh(ctr[0], vec_splats(PHILOX_M4x32_0));
-    __vector uint32 hi1 = vec_mulh(ctr[2], vec_splats(PHILOX_M4x32_1));
+    __vector unsigned int lo0 = vec_mul(ctr[0], vec_splats(PHILOX_M4x32_0));
+    __vector unsigned int lo1 = vec_mul(ctr[2], vec_splats(PHILOX_M4x32_1));
+    __vector unsigned int hi0 = vec_mulh(ctr[0], vec_splats(PHILOX_M4x32_0));
+    __vector unsigned int hi1 = vec_mulh(ctr[2], vec_splats(PHILOX_M4x32_1));
 #else
-    __vector uint32 lohi0a = (__vector uint32) vec_mule(ctr[0], vec_splats(PHILOX_M4x32_0));
-    __vector uint32 lohi0b = (__vector uint32) vec_mulo(ctr[0], vec_splats(PHILOX_M4x32_0));
-    __vector uint32 lohi1a = (__vector uint32) vec_mule(ctr[2], vec_splats(PHILOX_M4x32_1));
-    __vector uint32 lohi1b = (__vector uint32) vec_mulo(ctr[2], vec_splats(PHILOX_M4x32_1));
+    __vector unsigned int lohi0a = (__vector unsigned int) vec_mule(ctr[0], vec_splats(PHILOX_M4x32_0));
+    __vector unsigned int lohi0b = (__vector unsigned int) vec_mulo(ctr[0], vec_splats(PHILOX_M4x32_0));
+    __vector unsigned int lohi1a = (__vector unsigned int) vec_mule(ctr[2], vec_splats(PHILOX_M4x32_1));
+    __vector unsigned int lohi1b = (__vector unsigned int) vec_mulo(ctr[2], vec_splats(PHILOX_M4x32_1));
 
 #ifdef __LITTLE_ENDIAN__
-    __vector uint32 lo0 = vec_mergee(lohi0a, lohi0b);
-    __vector uint32 lo1 = vec_mergee(lohi1a, lohi1b);
-    __vector uint32 hi0 = vec_mergeo(lohi0a, lohi0b);
-    __vector uint32 hi1 = vec_mergeo(lohi1a, lohi1b);
+    __vector unsigned int lo0 = vec_mergee(lohi0a, lohi0b);
+    __vector unsigned int lo1 = vec_mergee(lohi1a, lohi1b);
+    __vector unsigned int hi0 = vec_mergeo(lohi0a, lohi0b);
+    __vector unsigned int hi1 = vec_mergeo(lohi1a, lohi1b);
 #else
-    __vector uint32 lo0 = vec_mergeo(lohi0a, lohi0b);
-    __vector uint32 lo1 = vec_mergeo(lohi1a, lohi1b);
-    __vector uint32 hi0 = vec_mergee(lohi0a, lohi0b);
-    __vector uint32 hi1 = vec_mergee(lohi1a, lohi1b);
+    __vector unsigned int lo0 = vec_mergeo(lohi0a, lohi0b);
+    __vector unsigned int lo1 = vec_mergeo(lohi1a, lohi1b);
+    __vector unsigned int hi0 = vec_mergee(lohi0a, lohi0b);
+    __vector unsigned int hi1 = vec_mergee(lohi1a, lohi1b);
 #endif
 #endif
 
@@ -328,7 +328,7 @@ QUALIFIERS void _philox4x32round(__vector uint32* ctr, __vector uint32* key)
     ctr[3] = lo0;
 }
 
-QUALIFIERS void _philox4x32bumpkey(__vector uint32* key)
+QUALIFIERS void _philox4x32bumpkey(__vector unsigned int* key)
 {
     key[0] = vec_add(key[0], vec_splats(PHILOX_W32_0));
     key[1] = vec_add(key[1], vec_splats(PHILOX_W32_1));
@@ -336,7 +336,7 @@ QUALIFIERS void _philox4x32bumpkey(__vector uint32* key)
 
 #ifdef __VSX__
 template<bool high>
-QUALIFIERS __vector double _uniform_double_hq(__vector uint32 x, __vector uint32 y)
+QUALIFIERS __vector double _uniform_double_hq(__vector unsigned int x, __vector unsigned int y)
 {
     // convert 32 to 64 bit
 #ifdef __LITTLE_ENDIAN__
@@ -364,16 +364,14 @@ QUALIFIERS __vector double _uniform_double_hq(__vector uint32 x, __vector uint32
 #endif
 
     // calculate z = x ^ y << (53 - 32))
-    __vector uint64 z = vec_sl((__vector uint64) y, vec_splats(53ULL - 32ULL));
-    z = vec_xor((__vector uint64) x, z);
+    __vector unsigned long long z = vec_sl((__vector unsigned long long) y, vec_splats(53ULL - 32ULL));
+    z = vec_xor((__vector unsigned long long) x, z);
 
     // convert uint64 to double
-#if defined(__has_builtin) && __has_builtin(__builtin_convertvector)
-    __vector double rs = __builtin_convertvector(z, __vector double);
-#elif defined(__GNUC__) && __GNUC__ >= 8
-    __vector double rs = vec_ctf(z, 0);
-#else
+#ifdef __ibmxl__
     __vector double rs = vec_ctd(z, 0);
+#else
+    __vector double rs = vec_ctf(z, 0);
 #endif
     // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
     rs = vec_madd(rs, vec_splats(TWOPOW53_INV_DOUBLE), vec_splats(TWOPOW53_INV_DOUBLE/2.0));
@@ -383,12 +381,12 @@ QUALIFIERS __vector double _uniform_double_hq(__vector uint32 x, __vector uint32
 #endif
 
 
-QUALIFIERS void philox_float4(__vector uint32 ctr0, __vector uint32 ctr1, __vector uint32 ctr2, __vector uint32 ctr3,
+QUALIFIERS void philox_float4(__vector unsigned int ctr0, __vector unsigned int ctr1, __vector unsigned int ctr2, __vector unsigned int ctr3,
                               uint32 key0, uint32 key1,
                               __vector float & rnd1, __vector float & rnd2, __vector float & rnd3, __vector float & rnd4)
 {
-    __vector uint32 key[2] = {vec_splats(key0), vec_splats(key1)};
-    __vector uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+    __vector unsigned int key[2] = {vec_splats(key0), vec_splats(key1)};
+    __vector unsigned int ctr[4] = {ctr0, ctr1, ctr2, ctr3};
     _philox4x32round(ctr, key);                           // 1
     _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 2
     _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 3
@@ -414,12 +412,12 @@ QUALIFIERS void philox_float4(__vector uint32 ctr0, __vector uint32 ctr1, __vect
 
 
 #ifdef __VSX__
-QUALIFIERS void philox_double2(__vector uint32 ctr0, __vector uint32 ctr1, __vector uint32 ctr2, __vector uint32 ctr3,
+QUALIFIERS void philox_double2(__vector unsigned int ctr0, __vector unsigned int ctr1, __vector unsigned int ctr2, __vector unsigned int ctr3,
                                uint32 key0, uint32 key1,
                                __vector double & rnd1lo, __vector double & rnd1hi, __vector double & rnd2lo, __vector double & rnd2hi)
 {
-    __vector uint32 key[2] = {vec_splats(key0), vec_splats(key1)};
-    __vector uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+    __vector unsigned int key[2] = {vec_splats(key0), vec_splats(key1)};
+    __vector unsigned int ctr[4] = {ctr0, ctr1, ctr2, ctr3};
     _philox4x32round(ctr, key);                           // 1
     _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 2
     _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 3
@@ -438,13 +436,13 @@ QUALIFIERS void philox_double2(__vector uint32 ctr0, __vector uint32 ctr1, __vec
 }
 #endif
 
-QUALIFIERS void philox_float4(uint32 ctr0, __vector uint32 ctr1, uint32 ctr2, uint32 ctr3,
+QUALIFIERS void philox_float4(uint32 ctr0, __vector unsigned int ctr1, uint32 ctr2, uint32 ctr3,
                               uint32 key0, uint32 key1,
                               __vector float & rnd1, __vector float & rnd2, __vector float & rnd3, __vector float & rnd4)
 {
-    __vector uint32 ctr0v = vec_splats(ctr0);
-    __vector uint32 ctr2v = vec_splats(ctr2);
-    __vector uint32 ctr3v = vec_splats(ctr3);
+    __vector unsigned int ctr0v = vec_splats(ctr0);
+    __vector unsigned int ctr2v = vec_splats(ctr2);
+    __vector unsigned int ctr3v = vec_splats(ctr3);
 
     philox_float4(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, rnd2, rnd3, rnd4);
 }
@@ -453,28 +451,28 @@ QUALIFIERS void philox_float4(uint32 ctr0, __vector int ctr1, uint32 ctr2, uint3
                               uint32 key0, uint32 key1,
                               __vector float & rnd1, __vector float & rnd2, __vector float & rnd3, __vector float & rnd4)
 {
-    philox_float4(ctr0, (__vector uint32) ctr1, ctr2, ctr3, key0, key1, rnd1, rnd2, rnd3, rnd4);
+    philox_float4(ctr0, (__vector unsigned int) ctr1, ctr2, ctr3, key0, key1, rnd1, rnd2, rnd3, rnd4);
 }
 
 #ifdef __VSX__
-QUALIFIERS void philox_double2(uint32 ctr0, __vector uint32 ctr1, uint32 ctr2, uint32 ctr3,
+QUALIFIERS void philox_double2(uint32 ctr0, __vector unsigned int ctr1, uint32 ctr2, uint32 ctr3,
                                uint32 key0, uint32 key1,
                                __vector double & rnd1lo, __vector double & rnd1hi, __vector double & rnd2lo, __vector double & rnd2hi)
 {
-    __vector uint32 ctr0v = vec_splats(ctr0);
-    __vector uint32 ctr2v = vec_splats(ctr2);
-    __vector uint32 ctr3v = vec_splats(ctr3);
+    __vector unsigned int ctr0v = vec_splats(ctr0);
+    __vector unsigned int ctr2v = vec_splats(ctr2);
+    __vector unsigned int ctr3v = vec_splats(ctr3);
 
     philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1lo, rnd1hi, rnd2lo, rnd2hi);
 }
 
-QUALIFIERS void philox_double2(uint32 ctr0, __vector uint32 ctr1, uint32 ctr2, uint32 ctr3,
+QUALIFIERS void philox_double2(uint32 ctr0, __vector unsigned int ctr1, uint32 ctr2, uint32 ctr3,
                                uint32 key0, uint32 key1,
                                __vector double & rnd1, __vector double & rnd2)
 {
-    __vector uint32 ctr0v = vec_splats(ctr0);
-    __vector uint32 ctr2v = vec_splats(ctr2);
-    __vector uint32 ctr3v = vec_splats(ctr3);
+    __vector unsigned int ctr0v = vec_splats(ctr0);
+    __vector unsigned int ctr2v = vec_splats(ctr2);
+    __vector unsigned int ctr3v = vec_splats(ctr3);
 
     __vector double ignore;
     philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, ignore, rnd2, ignore);
@@ -484,7 +482,7 @@ QUALIFIERS void philox_double2(uint32 ctr0, __vector int ctr1, uint32 ctr2, uint
                                uint32 key0, uint32 key1,
                                __vector double & rnd1, __vector double & rnd2)
 {
-    philox_double2(ctr0, (__vector uint32) ctr1, ctr2, ctr3, key0, key1, rnd1, rnd2);
+    philox_double2(ctr0, (__vector unsigned int) ctr1, ctr2, ctr3, key0, key1, rnd1, rnd2);
 }
 #endif
 #endif
diff --git a/pytest.ini b/pytest.ini
index e7b0eeb98f38cb30734a302aae3aad742e4b643e..db4823c05dd55d97262a02e25ad8652bf3f17d01 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -41,7 +41,7 @@ exclude_lines =
        if __name__ == .__main__.:
 
 skip_covered = True
-fail_under = 89
+fail_under = 88
 
 [html]
 directory = coverage_report