Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
pystencils
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Sebastian Bindgen
pystencils
Commits
5e3a72de
Commit
5e3a72de
authored
5 years ago
by
Jan Laukemann
Browse files
Options
Downloads
Patches
Plain Diff
changes for lbmpy tests
parent
1754ef27
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
pystencils/kerncraft_coupling/generate_benchmark.py
+99
-15
99 additions, 15 deletions
pystencils/kerncraft_coupling/generate_benchmark.py
pystencils/kerncraft_coupling/kerncraft_interface.py
+62
-11
62 additions, 11 deletions
pystencils/kerncraft_coupling/kerncraft_interface.py
with
161 additions
and
26 deletions
pystencils/kerncraft_coupling/generate_benchmark.py
+
99
−
15
View file @
5e3a72de
from
jinja2
import
Template
from
jinja2
import
Template
import
numpy
as
np
import
os
import
os
import
subprocess
import
subprocess
from
pystencils.include
import
get_pystencils_include_path
from
pystencils.include
import
get_pystencils_include_path
...
@@ -8,6 +9,25 @@ from pystencils.sympyextensions import prod
...
@@ -8,6 +9,25 @@ from pystencils.sympyextensions import prod
from
pystencils.data_types
import
get_base_type
from
pystencils.data_types
import
get_base_type
from
pystencils.astnodes
import
PragmaBlock
from
pystencils.astnodes
import
PragmaBlock
include_template
=
Template
(
"""
#include
"
kerncraft.h
"
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <stdio.h>
{{ includes }}
{%- if likwid %}
#include <likwid.h>
{%- endif %}
#define RESTRICT __restrict__
#define FUNC_PREFIX
"""
)
benchmark_template
=
Template
(
"""
benchmark_template
=
Template
(
"""
#include
"
kerncraft.h
"
#include
"
kerncraft.h
"
#include <stdlib.h>
#include <stdlib.h>
...
@@ -28,9 +48,42 @@ void dummy(void *);
...
@@ -28,9 +48,42 @@ void dummy(void *);
void timing(double* wcTime, double* cpuTime);
void timing(double* wcTime, double* cpuTime);
extern int var_false;
extern int var_false;
{{kernel_code}}
{{kernel_code}}
void *aligned_malloc_with_offset( unsigned long size, unsigned long alignment, unsigned long offset )
{
// With 0 alignment this function makes no sense
// use normal malloc instead
//WALBERLA_ASSERT_GREATER( alignment, 0 );
// Tests if alignment is power of two (assuming alignment>0)
//WALBERLA_ASSERT( !(alignment & (alignment - 1)) );
//WALBERLA_ASSERT_LESS( offset, alignment );
if( offset == 0 )
return aligned_malloc( size, alignment );
void *pa; // pointer to allocated memory
void *ptr; // pointer to usable aligned memory
pa=malloc( (size+2*alignment-1 )+sizeof(void *));
if(!pa)
return 0;
// Find next aligned position, starting at pa+sizeof(void*)-1
ptr=(void*)( ((size_t)pa+sizeof(void *)+alignment-1) & ~(alignment-1));
ptr=(void*) ( (char*)(ptr) + alignment - offset);
// Store pointer to real allocated chunk just before usable chunk
*((void **)ptr-1)=pa;
//WALBERLA_ASSERT_EQUAL( ((size_t)ptr+offset) % alignment, 0 );
return ptr;
}
int main(int argc, char **argv)
int main(int argc, char **argv)
{
{
...
@@ -41,7 +94,12 @@ int main(int argc, char **argv)
...
@@ -41,7 +94,12 @@ int main(int argc, char **argv)
{%- for field_name, dataType, size in fields %}
{%- for field_name, dataType, size in fields %}
// Initialization {{field_name}}
// Initialization {{field_name}}
{%- if aligned %}
double * {{field_name}} = (double *) aligned_malloc_with_offset(sizeof({{dataType}}) * {{size}}, 32, 8);
{%- else %}
double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
{%- endif %}
for (unsigned long long i = 0; i < {{size}}; ++i)
for (unsigned long long i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23;
{{field_name}}[i] = 0.23;
...
@@ -69,6 +127,10 @@ int main(int argc, char **argv)
...
@@ -69,6 +127,10 @@ int main(int argc, char **argv)
#pragma omp barrier
#pragma omp barrier
{%- elif likwid %}
{%- elif likwid %}
likwid_markerRegisterRegion(
"
loop
"
);
likwid_markerRegisterRegion(
"
loop
"
);
{%- elif openmp %}
#pragma omp parallel
{
#pragma omp barrier
{%- endif %}
{%- endif %}
for(int warmup = 1; warmup >= 0; --warmup) {
for(int warmup = 1; warmup >= 0; --warmup) {
...
@@ -107,10 +169,10 @@ int main(int argc, char **argv)
...
@@ -107,10 +169,10 @@ int main(int argc, char **argv)
{%- if likwid %}
{%- if likwid %}
likwid_markerStopRegion(
"
loop
"
);
likwid_markerStopRegion(
"
loop
"
);
{%- endif %}
{%- if openmp %}
{%- if openmp %}
}
}
{%- endif %}
{%- endif %}
{%- endif %}
{%- if likwid %}
{%- if likwid %}
likwid_markerClose();
likwid_markerClose();
...
@@ -119,13 +181,12 @@ int main(int argc, char **argv)
...
@@ -119,13 +181,12 @@ int main(int argc, char **argv)
"""
)
"""
)
def
generate_benchmark
(
ast
,
likwid
=
False
,
openmp
=
False
,
timing
=
False
):
def
generate_benchmark
(
ast
,
openmp
=
False
,
timing
=
False
,
main
=
True
,
kernel
=
True
,
assume_aligned
=
False
):
"""
Return C code of a benchmark program for the given kernel.
"""
Return C code of a benchmark program for the given kernel.
Args:
Args:
ast: the pystencils AST object as returned by create_kernel
ast: the pystencils AST object as returned by create_kernel
likwid: if True likwid markers are added to the code
openmp: to generated correct likwid initialization code
openmp: relevant only if likwid=True, to generated correct likwid initialization code
timing: add timing output to the code, prints time per iteration to stdout
timing: add timing output to the code, prints time per iteration to stdout
Returns:
Returns:
...
@@ -143,7 +204,13 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
...
@@ -143,7 +204,13 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
assert
p
.
is_field_pointer
,
"
Benchmark implemented only for kernels with fixed loop size
"
assert
p
.
is_field_pointer
,
"
Benchmark implemented only for kernels with fixed loop size
"
field
=
accessed_fields
[
p
.
field_name
]
field
=
accessed_fields
[
p
.
field_name
]
dtype
=
str
(
get_base_type
(
p
.
symbol
.
dtype
))
dtype
=
str
(
get_base_type
(
p
.
symbol
.
dtype
))
fields
.
append
((
p
.
field_name
,
dtype
,
prod
(
field
.
shape
)))
if
not
assume_aligned
:
fields
.
append
((
p
.
field_name
,
dtype
,
prod
(
field
.
shape
)))
else
:
index
=
np
.
where
(
field
.
strides
==
np
.
amax
(
field
.
strides
))[
0
][
0
]
size
=
field
.
shape
[
index
]
*
field
.
strides
[
index
]
fields
.
append
((
p
.
field_name
,
dtype
,
int
(
size
)))
call_parameters
.
append
(
p
.
field_name
)
call_parameters
.
append
(
p
.
field_name
)
header_list
=
get_headers
(
ast
)
header_list
=
get_headers
(
ast
)
...
@@ -151,14 +218,20 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
...
@@ -151,14 +218,20 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
# Strip "#pragma omp parallel" from within kernel, because main function takes care of that
# Strip "#pragma omp parallel" from within kernel, because main function takes care of that
# when likwid and openmp are enabled
# when likwid and openmp are enabled
if
likwid
and
openmp
:
#if openmp:
if
len
(
ast
.
body
.
args
)
>
0
and
isinstance
(
ast
.
body
.
args
[
0
],
PragmaBlock
):
# if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
ast
.
body
.
args
[
0
].
pragma_line
=
''
# ast.body.args[0].pragma_line = ''
if
not
main
and
kernel
:
return
include_template
.
render
(
{
'
includes
'
:
includes
,
'
likwid
'
:
False
})
+
generate_c
(
ast
,
dialect
=
'
c
'
)
args
=
{
args
=
{
'
likwid
'
:
likwid
,
'
aligned
'
:
assume_aligned
,
'
likwid
'
:
False
,
'
openmp
'
:
openmp
,
'
openmp
'
:
openmp
,
'
kernel_code
'
:
generate_c
(
ast
,
dialect
=
'
c
'
),
'
kernel_code
'
:
generate_c
(
ast
,
dialect
=
'
c
'
)
if
kernel
else
''
,
'
kernelName
'
:
ast
.
function_name
,
'
kernelName
'
:
ast
.
function_name
,
'
fields
'
:
fields
,
'
fields
'
:
fields
,
'
constants
'
:
constants
,
'
constants
'
:
constants
,
...
@@ -169,7 +242,7 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
...
@@ -169,7 +242,7 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
return
benchmark_template
.
render
(
**
args
)
return
benchmark_template
.
render
(
**
args
)
def
run_c_benchmark
(
ast
,
inner_iterations
,
outer_iterations
=
3
):
def
run_c_benchmark
(
ast
,
inner_iterations
,
outer_iterations
=
3
,
assume_aligned
=
False
,
openmp
=
False
):
"""
Runs the given kernel with outer loop in C
"""
Runs the given kernel with outer loop in C
Args:
Args:
...
@@ -182,7 +255,7 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
...
@@ -182,7 +255,7 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
"""
"""
import
kerncraft
import
kerncraft
benchmark_code
=
generate_benchmark
(
ast
,
timing
=
True
)
benchmark_code
=
generate_benchmark
(
ast
,
timing
=
True
,
assume_aligned
=
assume_aligned
,
openmp
=
openmp
)
with
open
(
'
bench.c
'
,
'
w
'
)
as
f
:
with
open
(
'
bench.c
'
,
'
w
'
)
as
f
:
f
.
write
(
benchmark_code
)
f
.
write
(
benchmark_code
)
...
@@ -190,17 +263,28 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
...
@@ -190,17 +263,28 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
extra_flags
=
[
'
-I
'
+
get_pystencils_include_path
(),
extra_flags
=
[
'
-I
'
+
get_pystencils_include_path
(),
'
-I
'
+
os
.
path
.
join
(
kerncraft_path
,
'
headers
'
)]
'
-I
'
+
os
.
path
.
join
(
kerncraft_path
,
'
headers
'
)]
if
False
:
extra_flags
.
append
(
os
.
environ
[
'
LIKWID_INC
'
])
extra_flags
.
append
(
os
.
environ
[
'
LIKWID_LIB
'
])
compiler_config
=
get_compiler_config
()
compiler_config
=
get_compiler_config
()
compile_cmd
=
[
compiler_config
[
'
command
'
]]
+
compiler_config
[
'
flags
'
].
split
()
compile_cmd
=
[
compiler_config
[
'
command
'
]]
+
compiler_config
[
'
flags
'
].
split
()
compile_asm_cmd
=
[
compiler_config
[
'
command
'
]]
+
compiler_config
[
'
flags
'
].
split
()
compile_cmd
+=
[
*
extra_flags
,
compile_cmd
+=
[
*
extra_flags
,
os
.
path
.
join
(
kerncraft_path
,
'
headers
'
,
'
timing.c
'
),
os
.
path
.
join
(
kerncraft_path
,
'
headers
'
,
'
timing.c
'
),
os
.
path
.
join
(
kerncraft_path
,
'
headers
'
,
'
dummy.c
'
),
os
.
path
.
join
(
kerncraft_path
,
'
headers
'
,
'
dummy.c
'
),
'
bench.c
'
,
'
bench.c
'
,
'
-o
'
,
'
bench
'
,
'
-o
'
,
'
bench
'
,
]
]
run_compile_step
(
compile_cmd
)
print
(
"
"
.
join
(
compile_cmd
))
compile_asm_cmd
+=
[
*
extra_flags
,
os
.
path
.
join
(
kerncraft_path
,
'
headers
'
,
'
timing.c
'
),
os
.
path
.
join
(
kerncraft_path
,
'
headers
'
,
'
dummy.c
'
),
'
bench.c
'
,
'
-S
'
,
]
run_compile_step
(
compile_cmd
)
run_compile_step
(
compile_asm_cmd
)
results
=
[]
results
=
[]
for
_
in
range
(
outer_iterations
):
for
_
in
range
(
outer_iterations
):
benchmark_time
=
float
(
subprocess
.
check_output
([
'
./bench
'
,
str
(
inner_iterations
)]))
benchmark_time
=
float
(
subprocess
.
check_output
([
'
./bench
'
,
str
(
inner_iterations
)]))
...
...
This diff is collapsed.
Click to expand it.
pystencils/kerncraft_coupling/kerncraft_interface.py
+
62
−
11
View file @
5e3a72de
...
@@ -21,10 +21,22 @@ class PyStencilsKerncraftKernel(KernelCode):
...
@@ -21,10 +21,22 @@ class PyStencilsKerncraftKernel(KernelCode):
Implementation of kerncraft
'
s kernel interface for pystencils CPU kernels.
Implementation of kerncraft
'
s kernel interface for pystencils CPU kernels.
Analyses a list of equations assuming they will be executed on a CPU
Analyses a list of equations assuming they will be executed on a CPU
"""
"""
LIKWID_BASE
=
'
/usr/local/likwid
'
LIKWID_BASE
=
'
/mnt/opt/likwid-4.3.4
'
@staticmethod
def
get_number_of_kernels
(
ast
:
KernelFunction
):
return
len
([
l
for
l
in
filtered_tree_iteration
(
ast
,
LoopOverCoordinate
,
stop_type
=
SympyAssignment
)
if
l
.
is_innermost_loop
])
@staticmethod
def
get_steps_for_loops
(
ast
:
KernelFunction
):
inner_loops
=
[
l
for
l
in
filtered_tree_iteration
(
ast
,
LoopOverCoordinate
,
stop_type
=
SympyAssignment
)
if
l
.
is_innermost_loop
]
steps
=
[
l
.
step
*
8
for
l
in
inner_loops
]
return
steps
def
__init__
(
self
,
ast
:
KernelFunction
,
machine
:
Optional
[
MachineModel
]
=
None
,
def
__init__
(
self
,
ast
:
KernelFunction
,
machine
:
Optional
[
MachineModel
]
=
None
,
assumed_layout
=
'
SoA
'
,
debug_print
=
False
,
filename
=
None
):
assumed_layout
=
'
SoA
'
,
debug_print
=
False
,
filename
=
None
,
loop_idx
=
False
):
"""
Create a kerncraft kernel using a pystencils AST
"""
Create a kerncraft kernel using a pystencils AST
Args:
Args:
...
@@ -50,11 +62,12 @@ class PyStencilsKerncraftKernel(KernelCode):
...
@@ -50,11 +62,12 @@ class PyStencilsKerncraftKernel(KernelCode):
if
len
(
inner_loops
)
==
0
:
if
len
(
inner_loops
)
==
0
:
raise
ValueError
(
"
No loop found in pystencils AST
"
)
raise
ValueError
(
"
No loop found in pystencils AST
"
)
else
:
else
:
if
len
(
inner_loops
)
>
1
:
if
len
(
inner_loops
)
>
1
and
loop_idx
is
False
:
warnings
.
warn
(
"
pystencils AST contains multiple inner loops.
"
warnings
.
warn
((
"
pystencils AST contains multiple inner loops ({}).
"
"
Only one can be analyzed - choosing first one
"
)
"
Only one can be analyzed - choosing first one
"
).
format
(
inner_loop
=
inner_loops
[
0
]
len
(
inner_loops
)))
loop_idx
=
0
inner_loop
=
inner_loops
[
loop_idx
]
self
.
_loop_stack
=
[]
self
.
_loop_stack
=
[]
cur_node
=
inner_loop
cur_node
=
inner_loop
while
cur_node
is
not
None
:
while
cur_node
is
not
None
:
...
@@ -95,7 +108,7 @@ class PyStencilsKerncraftKernel(KernelCode):
...
@@ -95,7 +108,7 @@ class PyStencilsKerncraftKernel(KernelCode):
for
field
in
fields_accessed
:
for
field
in
fields_accessed
:
layout
=
get_layout_tuple
(
field
)
layout
=
get_layout_tuple
(
field
)
permuted_shape
=
list
(
field
.
shape
[
i
]
for
i
in
layout
)
permuted_shape
=
list
(
field
.
shape
[
i
]
for
i
in
layout
)
self
.
set_variable
(
field
.
name
,
str
(
field
.
dtype
),
tuple
(
permuted_shape
))
self
.
set_variable
(
field
.
name
,
(
str
(
field
.
dtype
),
),
tuple
(
permuted_shape
))
# Scalars may be safely ignored
# Scalars may be safely ignored
# for param in ast.get_parameters():
# for param in ast.get_parameters():
...
@@ -128,6 +141,39 @@ class PyStencilsKerncraftKernel(KernelCode):
...
@@ -128,6 +141,39 @@ class PyStencilsKerncraftKernel(KernelCode):
print
(
"
----------------------------- FLOPS -------------------------------
"
)
print
(
"
----------------------------- FLOPS -------------------------------
"
)
pprint
(
self
.
_flops
)
pprint
(
self
.
_flops
)
def
get_main_code
(
self
,
as_filename
=
False
,
kernel_function_name
=
'
kernel
'
):
"""
Generate and return compilable source code from AST.
"""
code
=
generate_benchmark
(
self
.
kernel_ast
,
openmp
=
True
,
kernel
=
False
)
if
as_filename
:
fp
,
already_available
=
self
.
_get_intermediate_file
(
'
main.c
'
,
machine_and_compiler_dependent
=
False
)
if
not
already_available
:
fp
.
write
(
code
)
return
fp
.
name
else
:
return
code
def
get_kernel_code
(
self
,
openmp
=
False
,
as_filename
=
False
,
name
=
'
kernel
'
):
"""
Generate and return compilable source code.
Args:
openmp: if true, openmp code will be generated
as_filename:
"""
code
=
generate_benchmark
(
self
.
kernel_ast
,
openmp
=
openmp
,
main
=
False
,
kernel
=
True
)
if
as_filename
:
fp
,
already_available
=
self
.
_get_intermediate_file
(
'
kernel_{}.c
'
.
format
(
'
-omp
'
if
openmp
else
''
),
machine_and_compiler_dependent
=
False
)
if
not
already_available
:
fp
.
write
(
code
)
return
fp
.
name
else
:
return
code
def
as_code
(
self
,
type_
=
'
iaca
'
,
openmp
=
False
,
as_filename
=
False
):
def
as_code
(
self
,
type_
=
'
iaca
'
,
openmp
=
False
,
as_filename
=
False
):
"""
"""
Generate and return compilable source code.
Generate and return compilable source code.
...
@@ -137,6 +183,11 @@ class PyStencilsKerncraftKernel(KernelCode):
...
@@ -137,6 +183,11 @@ class PyStencilsKerncraftKernel(KernelCode):
openmp: if true, openmp code will be generated
openmp: if true, openmp code will be generated
as_filename:
as_filename:
"""
"""
warnings
.
warn
(
(
'
as_code is deprecated and works only for kerncraft <= 0.8.0. Please upgrade your
'
+
'
kerncraft version and use self.get_kernel_code
'
),
FutureWarning
)
code
=
generate_benchmark
(
self
.
kernel_ast
,
likwid
=
type_
==
'
likwid
'
,
openmp
=
openmp
)
code
=
generate_benchmark
(
self
.
kernel_ast
,
likwid
=
type_
==
'
likwid
'
,
openmp
=
openmp
)
if
as_filename
:
if
as_filename
:
fp
,
already_available
=
self
.
_get_intermediate_file
(
'
kernel_{}.c
'
.
format
(
type_
),
fp
,
already_available
=
self
.
_get_intermediate_file
(
'
kernel_{}.c
'
.
format
(
type_
),
...
@@ -149,14 +200,14 @@ class PyStencilsKerncraftKernel(KernelCode):
...
@@ -149,14 +200,14 @@ class PyStencilsKerncraftKernel(KernelCode):
class
KerncraftParameters
(
DotDict
):
class
KerncraftParameters
(
DotDict
):
def
__init__
(
self
,
**
kwargs
):
def
__init__
(
self
,
ptr_inc
=
'
auto
'
,
**
kwargs
):
super
(
KerncraftParameters
,
self
).
__init__
(
**
kwargs
)
super
(
KerncraftParameters
,
self
).
__init__
(
**
kwargs
)
self
[
'
asm_block
'
]
=
'
auto
'
self
[
'
asm_block
'
]
=
'
auto
'
self
[
'
asm_increment
'
]
=
0
self
[
'
asm_increment
'
]
=
0
self
[
'
cores
'
]
=
1
self
[
'
cores
'
]
=
1
self
[
'
cache_predictor
'
]
=
'
SIM
'
self
[
'
cache_predictor
'
]
=
'
SIM
'
self
[
'
verbose
'
]
=
0
self
[
'
verbose
'
]
=
1
self
[
'
pointer_increment
'
]
=
'
auto
'
self
[
'
pointer_increment
'
]
=
ptr_inc
self
[
'
iterations
'
]
=
10
self
[
'
iterations
'
]
=
10
self
[
'
unit
'
]
=
'
cy/CL
'
self
[
'
unit
'
]
=
'
cy/CL
'
self
[
'
ignore_warnings
'
]
=
True
self
[
'
ignore_warnings
'
]
=
True
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment