Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • armneon
  • compare_fix
  • const_fix
  • gpu_liveness_opts
  • hyteg
  • improved_comm
  • jan_fix
  • jan_test
  • master
  • mr_parallel_datahandling_fix
  • philox-simd
  • target_dh_refactoring
  • test_martin
  • test_martin2
  • vectorization_sqrt_fix
  • release/0.2.1
  • release/0.2.10
  • release/0.2.11
  • release/0.2.12
  • release/0.2.13
  • release/0.2.14
  • release/0.2.15
  • release/0.2.2
  • release/0.2.3
  • release/0.2.4
  • release/0.2.6
  • release/0.2.7
  • release/0.2.8
  • release/0.2.9
29 results

Target

Select target project
  • anirudh.jonnalagadda/pystencils
  • hyteg/pystencils
  • jbadwaik/pystencils
  • jngrad/pystencils
  • itischler/pystencils
  • ob28imeq/pystencils
  • hoenig/pystencils
  • Bindgen/pystencils
  • hammer/pystencils
  • da15siwa/pystencils
  • holzer/pystencils
  • alexander.reinauer/pystencils
  • ec93ujoh/pystencils
  • Harke/pystencils
  • seitz/pystencils
  • pycodegen/pystencils
16 results
Select Git revision
  • 66-absolute-access-is-probably-not-copied-correctly-after-_eval_subs
  • const_fix
  • fhennig/v2.0-deprecations
  • fhennig/x86-gather-scatter
  • fma
  • gpu_bufferfield_fix
  • gpu_liveness_opts
  • holzer-master-patch-46757
  • hyteg
  • improved_comm
  • master
  • target_dh_refactoring
  • v2.0-dev
  • vectorization_sqrt_fix
  • zikeliml/124-rework-tutorials
  • zikeliml/Task-96-dotExporterForAST
  • last/Kerncraft
  • last/LLVM
  • last/OpenCL
  • release/0.2.1
  • release/0.2.10
  • release/0.2.11
  • release/0.2.12
  • release/0.2.13
  • release/0.2.14
  • release/0.2.15
  • release/0.2.2
  • release/0.2.3
  • release/0.2.4
  • release/0.2.6
  • release/0.2.7
  • release/0.2.8
  • release/0.2.9
  • release/0.3.0
  • release/0.3.1
  • release/0.3.2
  • release/0.3.3
  • release/0.3.4
  • release/0.4.0
  • release/0.4.1
  • release/0.4.2
  • release/0.4.3
  • release/0.4.4
  • release/1.0
  • release/1.0.1
  • release/1.1
  • release/1.1.1
  • release/1.2
  • release/1.3
  • release/1.3.1
  • release/1.3.2
  • release/1.3.3
  • release/1.3.4
  • release/1.3.5
  • release/1.3.6
  • release/1.3.7
  • release/2.0.dev0
57 results
Show changes
Commits on Source (1)
%% Cell type:code id: tags:
``` python
%matplotlib notebook
import matplotlib.pyplot as plt
from pystencils.session import *
import pystencils
```
%% Cell type:code id: tags:
``` python
src_arr = np.zeros([1024,1024], dtype=np.float64)
dst_arr = np.zeros_like(src_arr)
dst, src = ps.fields(dst=dst_arr, src=src_arr)
```
%% Cell type:code id: tags:
``` python
grad_x, grad_y = sp.symbols("grad_x, grad_y")
symbolic_description = [
ps.Assignment(dst[0, 0], (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4)
]
kernel = ps.create_kernel(symbolic_description)
symbolic_description
```
%% Output
$\displaystyle \left[ {{dst}_{(0,0)}} \leftarrow \frac{{{src}_{(-1,0)}}}{4} + \frac{{{src}_{(0,-1)}}}{4} + \frac{{{src}_{(0,1)}}}{4} + \frac{{{src}_{(1,0)}}}{4}\right]$
⎡ src_W src_S src_N src_E⎤
⎢dst_C := ───── + ───── + ───── + ─────⎥
⎣ 4 4 4 4 ⎦
%% Cell type:code id: tags:
``` python
@ps.kernel
def symbolic_description_using_function():
dst[0, 0] @= (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4
symbolic_description_using_function
```
%% Output
$\displaystyle \left[ {{dst}_{(0,0)}} \leftarrow \frac{{{src}_{(-1,0)}}}{4} + \frac{{{src}_{(0,-1)}}}{4} + \frac{{{src}_{(0,1)}}}{4} + \frac{{{src}_{(1,0)}}}{4}\right]$
⎡ src_W src_S src_N src_E⎤
⎢dst_C := ───── + ───── + ───── + ─────⎥
⎣ 4 4 4 4 ⎦
%% Cell type:code id: tags:
``` python
kernel = ps.create_kernel(symbolic_description_using_function)
ps.show_code(kernel)
```
%% Output
%% Cell type:code id: tags:
``` python
src_arr[:,0] = 1
src_arr[0,:] = 1
compiled_kernel = kernel.compile()
plt.imshow(src_arr)
plt.colorbar();
```
%% Output
%% Cell type:code id: tags:
``` python
for _ in range(1000):
compiled_kernel(src=src_arr, dst=dst_arr)
src_arr, dst_arr = dst_arr, src_arr
plt.imshow(src_arr)
plt.colorbar();
```
%% Output
%% Cell type:code id: tags:
``` python
from pystencils import kerncraft_coupling as kc
import kerncraft
```
%% Cell type:code id: tags:
``` python
analyzed = kc.PyStencilsKerncraftKernel(kernel)
```
%% Cell type:code id: tags:
``` python
print(analyzed.as_code())
```
%% Output
#include "kerncraft.h"
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <stdio.h>
#define RESTRICT __restrict__
#define FUNC_PREFIX
void dummy(void *);
void timing(double* wcTime, double* cpuTime);
extern int var_false;
FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)
{
for (int ctr_0 = 1; ctr_0 < 1023; ctr_0 += 1)
{
double * RESTRICT _data_dst_00 = _data_dst + 1024*ctr_0;
double * RESTRICT _data_src_01 = _data_src + 1024*ctr_0 + 1024;
double * RESTRICT _data_src_00 = _data_src + 1024*ctr_0;
double * RESTRICT _data_src_0m1 = _data_src + 1024*ctr_0 - 1024;
for (int ctr_1 = 1; ctr_1 < 1023; ctr_1 += 1)
{
_data_dst_00[ctr_1] = 0.25*_data_src_00[ctr_1 + 1] + 0.25*_data_src_00[ctr_1 - 1] + 0.25*_data_src_01[ctr_1] + 0.25*_data_src_0m1[ctr_1];
}
}
}
int main(int argc, char **argv)
{
// Initialization dst
double * dst = (double *) aligned_malloc(sizeof(double) * 1048576, 64);
for (unsigned long long i = 0; i < 1048576; ++i)
dst[i] = 0.23;
if(var_false)
dummy(dst);
// Initialization src
double * src = (double *) aligned_malloc(sizeof(double) * 1048576, 64);
for (unsigned long long i = 0; i < 1048576; ++i)
src[i] = 0.23;
if(var_false)
dummy(src);
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
}
for (; repeat > 0; --repeat)
{
kernel(dst,src);
// Dummy calls
if(var_false) dummy((void*)dst);
if(var_false) dummy((void*)src);
}
}
}
%% Cell type:code id: tags:
``` python
analyzed.array_sizes()
```
%% Output
{'src': 1048576, 'dst': 1048576}
%% Cell type:code id: tags:
``` python
list(analyzed.get_loop_stack())
```
%% Output
[{'index': 'ctr_0', 'start': 1, 'stop': 1023, 'increment': 1},
{'index': 'ctr_1', 'start': 1, 'stop': 1023, 'increment': 1}]
%% Cell type:code id: tags:
``` python
analyzed.print_kernel_info()
```
%% Output
loop stack: idx | min max step
---------+---------------------------------
ctr_0 | 1 1023 1
ctr_1 | 1 1023 1
data sources: name | offsets ...
---------+------------...
src | [ctr_0, ctr_1 - 1]
| [ctr_0 + 1, ctr_1]
| [ctr_0 - 1, ctr_1]
| [ctr_0, ctr_1 + 1]
data destinations: name | offsets ...
---------+------------...
dst | [ctr_0, ctr_1]
FLOPs: op | count
----+-------
+ | 3
* | 4
=======
7
%% Cell type:code id: tags:
``` python
analyzed.print_constants_info()
```
%% Output
constants: name | value
---------+-----------
%% Cell type:code id: tags:
``` python
analyzed.print_variables_info()
```
%% Output
variables: name | type size
---------+----------------------------------
src | float (1024, 1024)
dst | float (1024, 1024)
%% Cell type:code id: tags:
``` python
machine = kerncraft.machinemodel.MachineModel(path_to_yaml="some_machine.yaml")
model = kerncraft.models.ECMData(analyzed, machine, kc.KerncraftParameters())
```
%% Cell type:code id: tags:
``` python
model.analyze()
```
%% Output
{'cycles': [('L2', 3.0255005100102004),
('L3', 6.0516410328206565),
('MEM', 0.0)],
'misses': [2.023880477609552, 2.023880477609552, 0.0, 0.0],
'hits': [62.97805956119122, 0.0, 2.023880477609552, 0.0],
'evicts': [1.0016200324006481, 1.001940038800776, 0.0, 0.0],
'verbose infos': {'memory hierarchy': [{'index': 0,
'level': 'L1',
'total loads': 320.1241624832497,
'total misses': 68.21192423848477,
'total hits': 251.9122382447649,
'total stores': 64.0,
'total evicts': 64.10368207364148,
'total lines load': 65.00194003880078,
'total lines misses': 2.023880477609552,
'total lines hits': 62.97805956119122,
'total lines stores': 16.0,
'total lines evicts': 1.0016200324006481,
'cycles': None},
{'index': 1,
'level': 'L2',
'total loads': 129.52835056701133,
'total misses': 129.52835056701133,
'total hits': 0.0,
'total stores': 64.10368207364148,
'total evicts': 64.12416248324966,
'total lines load': 2.023880477609552,
'total lines misses': 2.023880477609552,
'total lines hits': 0.0,
'total lines stores': 1.0016200324006481,
'total lines evicts': 1.001940038800776,
'cycles': None},
{'index': 2,
'level': 'L3',
'total loads': 129.52835056701133,
'total misses': 0.0,
'total hits': 129.52835056701133,
'total stores': 64.12416248324966,
'total evicts': 0.0,
'total lines load': 2.023880477609552,
'total lines misses': 0.0,
'total lines hits': 2.023880477609552,
'total lines stores': 1.001940038800776,
'total lines evicts': 0.0,
'cycles': None},
{'index': 3,
'level': 'MEM',
'total loads': 0.0,
'total misses': 0.0,
'total hits': 0.0,
'total stores': 0.0,
'total evicts': 0.0,
'total lines load': 0.0,
'total lines misses': 0.0,
'total lines hits': 0.0,
'total lines stores': 0.0,
'total lines evicts': 0.0,
'cycles': None}],
'cache stats': [{'name': 'L1',
'LOAD_count': 406254,
'LOAD_byte': 2000736,
'STORE_count': 99998,
'STORE_byte': 399992,
'HIT_count': 393605,
'HIT_byte': 1574420,
'MISS_count': 12649,
'MISS_byte': 426316,
'EVICT_count': 6260,
'EVICT_byte': 400640},
{'name': 'L2',
'LOAD_count': 12649,
'LOAD_byte': 809536,
'STORE_count': 6260,
'STORE_byte': 400640,
'HIT_count': 0,
'HIT_byte': 0,
'MISS_count': 12649,
'MISS_byte': 809536,
'EVICT_count': 6262,
'EVICT_byte': 400768},
{'name': 'L3',
'LOAD_count': 12649,
'LOAD_byte': 809536,
'STORE_count': 6262,
'STORE_byte': 400768,
'HIT_count': 12649,
'HIT_byte': 809536,
'MISS_count': 0,
'MISS_byte': 0,
'EVICT_count': 0,
'EVICT_byte': 0},
{'name': 'MEM',
'LOAD_count': 0,
'LOAD_byte': 0,
'HIT_count': 0,
'HIT_byte': 0,
'STORE_count': 0,
'STORE_byte': 0,
'EVICT_count': 0,
'EVICT_byte': 0,
'MISS_count': 0,
'MISS_byte': 0}],
'cachelines in stats': 6249.875},
'iterations per cacheline': 16,
'L2': 3.0255005100102004,
'L3': 6.0516410328206565,
'memory bandwidth kernel': 'load',
'memory bandwidth': PrefixedUnit(61.82, 'G', 'B/s'),
'MEM': 0.0,
'flops per iteration': 7}
%% Cell type:code id: tags:
``` python
model.report()
```
%% Output
L2 = 3.03 cy/CL
L3 = 6.05 cy/CL
MEM = 0.00 cy/CL
%% Cell type:code id: tags:
``` python
```
...@@ -96,7 +96,8 @@ class PyStencilsKerncraftKernel(KernelCode): ...@@ -96,7 +96,8 @@ class PyStencilsKerncraftKernel(KernelCode):
for field in fields_accessed: for field in fields_accessed:
layout = get_layout_tuple(field) layout = get_layout_tuple(field)
permuted_shape = list(field.shape[i] for i in layout) permuted_shape = list(field.shape[i] for i in layout)
self.set_variable(field.name, str(field.dtype), tuple(permuted_shape)) self.set_variable(field.name, ('double',) if str(field.dtype) == 'float64' else
('float',), tuple(permuted_shape))
# Scalars may be safely ignored # Scalars may be safely ignored
# for param in ast.get_parameters(): # for param in ast.get_parameters():
......