Skip to content
Snippets Groups Projects

WIP: ARM cache line zeroing

Closed Michael Kuron requested to merge cachelinezero into master
Viewing commit 366e3de2
Show latest version
1 file
+ 29
0
Preferences
Compare changes
@@ -17,3 +17,32 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d)
@@ -17,3 +17,32 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d)
alignas(16) int data[4] = {a, b, c, d};
alignas(16) int data[4] = {a, b, c, d};
return vld1q_s32(data);
return vld1q_s32(data);
}
}
 
 
// ZVA size is usually 64 bytes, but needs to be checked
 
int zva_size() {
 
uint64_t dczid;
 
asm volatile ("mrs %0, dczid_el0" : "=r"(dczid));
 
if ((dczid & (1 << 4)) == 0) {
 
int size = 4 << (dczid & 0xf);
 
return size;
 
}
 
return 0;
 
}
 
 
// write zva_size bytes of float vectors to memory aligned to zva_size bytes
 
inline void stream_f32(float32x4_t f[4], float* p) {
 
asm volatile("dc zva, %0"::"r"(p));
 
vst1q_f32(p, f[0]);
 
vst1q_f32(p, f[1]);
 
vst1q_f32(p, f[2]);
 
vst1q_f32(p, f[3]);
 
}
 
 
// write zva_size bytes of double vectors to memory aligned to zva_size bytes
 
inline void stream_f64(float64x2_t f[4], double* p) {
 
asm volatile("dc zva, %0"::"r"(p));
 
vst1q_f64(p, f[0]);
 
vst1q_f64(p, f[1]);
 
vst1q_f64(p, f[2]);
 
vst1q_f64(p, f[3]);
 
}