Skip to content
Snippets Groups Projects
Commit bf71596f authored by Jan Hönig's avatar Jan Hönig
Browse files

Presentation of kerncraft. Fixed kerncraft

parent e87b4daf
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
%matplotlib notebook
import matplotlib.pyplot as plt
from pystencils.session import *
import pystencils
```
%% Cell type:code id: tags:
``` python
src_arr = np.zeros([1024,1024], dtype=np.float64)
dst_arr = np.zeros_like(src_arr)
dst, src = ps.fields(dst=dst_arr, src=src_arr)
```
%% Cell type:code id: tags:
``` python
grad_x, grad_y = sp.symbols("grad_x, grad_y")
symbolic_description = [
ps.Assignment(dst[0, 0], (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4)
]
kernel = ps.create_kernel(symbolic_description)
symbolic_description
```
%% Output
$\displaystyle \left[ {{dst}_{(0,0)}} \leftarrow \frac{{{src}_{(-1,0)}}}{4} + \frac{{{src}_{(0,-1)}}}{4} + \frac{{{src}_{(0,1)}}}{4} + \frac{{{src}_{(1,0)}}}{4}\right]$
⎡ src_W src_S src_N src_E⎤
⎢dst_C := ───── + ───── + ───── + ─────⎥
⎣ 4 4 4 4 ⎦
%% Cell type:code id: tags:
``` python
@ps.kernel
def symbolic_description_using_function():
dst[0, 0] @= (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4
symbolic_description_using_function
```
%% Output
$\displaystyle \left[ {{dst}_{(0,0)}} \leftarrow \frac{{{src}_{(-1,0)}}}{4} + \frac{{{src}_{(0,-1)}}}{4} + \frac{{{src}_{(0,1)}}}{4} + \frac{{{src}_{(1,0)}}}{4}\right]$
⎡ src_W src_S src_N src_E⎤
⎢dst_C := ───── + ───── + ───── + ─────⎥
⎣ 4 4 4 4 ⎦
%% Cell type:code id: tags:
``` python
kernel = ps.create_kernel(symbolic_description_using_function)
ps.show_code(kernel)
```
%% Output
%% Cell type:code id: tags:
``` python
src_arr[:,0] = 1
src_arr[0,:] = 1
compiled_kernel = kernel.compile()
plt.imshow(src_arr)
plt.colorbar();
```
%% Output
%% Cell type:code id: tags:
``` python
for _ in range(1000):
compiled_kernel(src=src_arr, dst=dst_arr)
src_arr, dst_arr = dst_arr, src_arr
plt.imshow(src_arr)
plt.colorbar();
```
%% Output
%% Cell type:code id: tags:
``` python
from pystencils import kerncraft_coupling as kc
import kerncraft
```
%% Cell type:code id: tags:
``` python
analyzed = kc.PyStencilsKerncraftKernel(kernel)
```
%% Cell type:code id: tags:
``` python
print(analyzed.as_code())
```
%% Output
#include "kerncraft.h"
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <stdio.h>
#define RESTRICT __restrict__
#define FUNC_PREFIX
void dummy(void *);
void timing(double* wcTime, double* cpuTime);
extern int var_false;
FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)
{
for (int ctr_0 = 1; ctr_0 < 1023; ctr_0 += 1)
{
double * RESTRICT _data_dst_00 = _data_dst + 1024*ctr_0;
double * RESTRICT _data_src_01 = _data_src + 1024*ctr_0 + 1024;
double * RESTRICT _data_src_00 = _data_src + 1024*ctr_0;
double * RESTRICT _data_src_0m1 = _data_src + 1024*ctr_0 - 1024;
for (int ctr_1 = 1; ctr_1 < 1023; ctr_1 += 1)
{
_data_dst_00[ctr_1] = 0.25*_data_src_00[ctr_1 + 1] + 0.25*_data_src_00[ctr_1 - 1] + 0.25*_data_src_01[ctr_1] + 0.25*_data_src_0m1[ctr_1];
}
}
}
int main(int argc, char **argv)
{
// Initialization dst
double * dst = (double *) aligned_malloc(sizeof(double) * 1048576, 64);
for (unsigned long long i = 0; i < 1048576; ++i)
dst[i] = 0.23;
if(var_false)
dummy(dst);
// Initialization src
double * src = (double *) aligned_malloc(sizeof(double) * 1048576, 64);
for (unsigned long long i = 0; i < 1048576; ++i)
src[i] = 0.23;
if(var_false)
dummy(src);
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
}
for (; repeat > 0; --repeat)
{
kernel(dst,src);
// Dummy calls
if(var_false) dummy((void*)dst);
if(var_false) dummy((void*)src);
}
}
}
%% Cell type:code id: tags:
``` python
analyzed.array_sizes()
```
%% Output
{'src': 1048576, 'dst': 1048576}
%% Cell type:code id: tags:
``` python
list(analyzed.get_loop_stack())
```
%% Output
[{'index': 'ctr_0', 'start': 1, 'stop': 1023, 'increment': 1},
{'index': 'ctr_1', 'start': 1, 'stop': 1023, 'increment': 1}]
%% Cell type:code id: tags:
``` python
analyzed.print_kernel_info()
```
%% Output
loop stack: idx | min max step
---------+---------------------------------
ctr_0 | 1 1023 1
ctr_1 | 1 1023 1
data sources: name | offsets ...
---------+------------...
src | [ctr_0, ctr_1 - 1]
| [ctr_0 + 1, ctr_1]
| [ctr_0 - 1, ctr_1]
| [ctr_0, ctr_1 + 1]
data destinations: name | offsets ...
---------+------------...
dst | [ctr_0, ctr_1]
FLOPs: op | count
----+-------
+ | 3
* | 4
=======
7
%% Cell type:code id: tags:
``` python
analyzed.print_constants_info()
```
%% Output
constants: name | value
---------+-----------
%% Cell type:code id: tags:
``` python
analyzed.print_variables_info()
```
%% Output
variables: name | type size
---------+----------------------------------
src | float (1024, 1024)
dst | float (1024, 1024)
%% Cell type:code id: tags:
``` python
machine = kerncraft.machinemodel.MachineModel(path_to_yaml="some_machine.yaml")
model = kerncraft.models.ECMData(analyzed, machine, kc.KerncraftParameters())
```
%% Cell type:code id: tags:
``` python
model.analyze()
```
%% Output
{'cycles': [('L2', 3.0255005100102004),
('L3', 6.0516410328206565),
('MEM', 0.0)],
'misses': [2.023880477609552, 2.023880477609552, 0.0, 0.0],
'hits': [62.97805956119122, 0.0, 2.023880477609552, 0.0],
'evicts': [1.0016200324006481, 1.001940038800776, 0.0, 0.0],
'verbose infos': {'memory hierarchy': [{'index': 0,
'level': 'L1',
'total loads': 320.1241624832497,
'total misses': 68.21192423848477,
'total hits': 251.9122382447649,
'total stores': 64.0,
'total evicts': 64.10368207364148,
'total lines load': 65.00194003880078,
'total lines misses': 2.023880477609552,
'total lines hits': 62.97805956119122,
'total lines stores': 16.0,
'total lines evicts': 1.0016200324006481,
'cycles': None},
{'index': 1,
'level': 'L2',
'total loads': 129.52835056701133,
'total misses': 129.52835056701133,
'total hits': 0.0,
'total stores': 64.10368207364148,
'total evicts': 64.12416248324966,
'total lines load': 2.023880477609552,
'total lines misses': 2.023880477609552,
'total lines hits': 0.0,
'total lines stores': 1.0016200324006481,
'total lines evicts': 1.001940038800776,
'cycles': None},
{'index': 2,
'level': 'L3',
'total loads': 129.52835056701133,
'total misses': 0.0,
'total hits': 129.52835056701133,
'total stores': 64.12416248324966,
'total evicts': 0.0,
'total lines load': 2.023880477609552,
'total lines misses': 0.0,
'total lines hits': 2.023880477609552,
'total lines stores': 1.001940038800776,
'total lines evicts': 0.0,
'cycles': None},
{'index': 3,
'level': 'MEM',
'total loads': 0.0,
'total misses': 0.0,
'total hits': 0.0,
'total stores': 0.0,
'total evicts': 0.0,
'total lines load': 0.0,
'total lines misses': 0.0,
'total lines hits': 0.0,
'total lines stores': 0.0,
'total lines evicts': 0.0,
'cycles': None}],
'cache stats': [{'name': 'L1',
'LOAD_count': 406254,
'LOAD_byte': 2000736,
'STORE_count': 99998,
'STORE_byte': 399992,
'HIT_count': 393605,
'HIT_byte': 1574420,
'MISS_count': 12649,
'MISS_byte': 426316,
'EVICT_count': 6260,
'EVICT_byte': 400640},
{'name': 'L2',
'LOAD_count': 12649,
'LOAD_byte': 809536,
'STORE_count': 6260,
'STORE_byte': 400640,
'HIT_count': 0,
'HIT_byte': 0,
'MISS_count': 12649,
'MISS_byte': 809536,
'EVICT_count': 6262,
'EVICT_byte': 400768},
{'name': 'L3',
'LOAD_count': 12649,
'LOAD_byte': 809536,
'STORE_count': 6262,
'STORE_byte': 400768,
'HIT_count': 12649,
'HIT_byte': 809536,
'MISS_count': 0,
'MISS_byte': 0,
'EVICT_count': 0,
'EVICT_byte': 0},
{'name': 'MEM',
'LOAD_count': 0,
'LOAD_byte': 0,
'HIT_count': 0,
'HIT_byte': 0,
'STORE_count': 0,
'STORE_byte': 0,
'EVICT_count': 0,
'EVICT_byte': 0,
'MISS_count': 0,
'MISS_byte': 0}],
'cachelines in stats': 6249.875},
'iterations per cacheline': 16,
'L2': 3.0255005100102004,
'L3': 6.0516410328206565,
'memory bandwidth kernel': 'load',
'memory bandwidth': PrefixedUnit(61.82, 'G', 'B/s'),
'MEM': 0.0,
'flops per iteration': 7}
%% Cell type:code id: tags:
``` python
model.report()
```
%% Output
L2 = 3.03 cy/CL
L3 = 6.05 cy/CL
MEM = 0.00 cy/CL
%% Cell type:code id: tags:
``` python
```
...@@ -96,7 +96,8 @@ class PyStencilsKerncraftKernel(KernelCode): ...@@ -96,7 +96,8 @@ class PyStencilsKerncraftKernel(KernelCode):
for field in fields_accessed: for field in fields_accessed:
layout = get_layout_tuple(field) layout = get_layout_tuple(field)
permuted_shape = list(field.shape[i] for i in layout) permuted_shape = list(field.shape[i] for i in layout)
self.set_variable(field.name, str(field.dtype), tuple(permuted_shape)) self.set_variable(field.name, ('double',) if str(field.dtype) == 'float64' else
('float',), tuple(permuted_shape))
# Scalars may be safely ignored # Scalars may be safely ignored
# for param in ast.get_parameters(): # for param in ast.get_parameters():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment