Skip to content
Snippets Groups Projects
Commit 1f800cba authored by Michael Kuron's avatar Michael Kuron :mortar_board: Committed by Michael Kuron
Browse files

ARM has a cache line zero instruction that prevents data that will be...

ARM has a cache line zero instruction that prevents data that will be overwritten anyway from being loaded from RAM. Kind of a non-temporal store light.
parent 30641109
1 merge request!225WIP: ARM cache line zeroing
Pipeline #30584 passed with stage
in 12 minutes and 31 seconds
......@@ -17,3 +17,32 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d)
alignas(16) int data[4] = {a, b, c, d};
return vld1q_s32(data);
}
// ZVA size is usually 64 bytes, but needs to be checked
int zva_size() {
uint64_t dczid;
asm volatile ("mrs %0, dczid_el0" : "=r"(dczid));
if ((dczid & (1 << 4)) == 0) {
int size = 4 << (dczid & 0xf);
return size;
}
return 0;
}
// write zva_size bytes of float vectors to memory aligned to zva_size bytes
inline void stream_f32(float32x4_t f[4], float* p) {
asm volatile("dc zva, %0"::"r"(p));
vst1q_f32(p, f[0]);
vst1q_f32(p+4, f[1]);
vst1q_f32(p+8, f[2]);
vst1q_f32(p+12, f[3]);
}
// write zva_size bytes of double vectors to memory aligned to zva_size bytes
inline void stream_f64(float64x2_t f[4], double* p) {
asm volatile("dc zva, %0"::"r"(p));
vst1q_f64(p, f[0]);
vst1q_f64(p+2, f[1]);
vst1q_f64(p+4, f[2]);
vst1q_f64(p+6, f[3]);
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment