Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
pystencils
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pycodegen
pystencils
Commits
9da89cd1
Commit
9da89cd1
authored
4 years ago
by
Julian Hammer
Browse files
Options
Downloads
Patches
Plain Diff
Updated Kerncraft Coupling
parent
622aaa6c
No related branches found
No related tags found
1 merge request
!183
Updated Kerncraft Coupling
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
pystencils/pystencils/kerncraft_coupling/kerncraft_interface.py
+214
-8
214 additions, 8 deletions
...cils/pystencils/kerncraft_coupling/kerncraft_interface.py
with
214 additions
and
8 deletions
pystencils/pystencils/kerncraft_coupling/kerncraft_interface.py
+
214
−
8
View file @
9da89cd1
from
tempfile
import
TemporaryDirectory
from
tempfile
import
TemporaryDirectory
import
fcntl
import
textwrap
from
copy
import
deepcopy
import
warnings
import
sympy
as
sp
import
sympy
as
sp
from
collections
import
defaultdict
from
collections
import
defaultdict
...
@@ -6,14 +10,18 @@ import kerncraft
...
@@ -6,14 +10,18 @@ import kerncraft
import
kerncraft.kernel
import
kerncraft.kernel
from
typing
import
Optional
from
typing
import
Optional
from
kerncraft.machinemodel
import
MachineModel
from
kerncraft.machinemodel
import
MachineModel
from
jinja2
import
Template
from
pystencils.kerncraft_coupling.generate_benchmark
import
generate_benchmark
from
pystencils.kerncraft_coupling.generate_benchmark
import
generate_benchmark
from
pystencils.astnodes
import
LoopOverCoordinate
,
SympyAssignment
,
ResolvedFieldAccess
,
KernelFunction
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.astnodes
import
\
LoopOverCoordinate
,
SympyAssignment
,
ResolvedFieldAccess
,
KernelFunction
from
pystencils.field
import
get_layout_from_strides
from
pystencils.field
import
get_layout_from_strides
from
pystencils.sympyextensions
import
count_operations_in_ast
from
pystencils.sympyextensions
import
count_operations_in_ast
from
pystencils.transformations
import
filtered_tree_iteration
from
pystencils.transformations
import
filtered_tree_iteration
from
pystencils.utils
import
DotDict
from
pystencils.utils
import
DotDict
import
warnings
from
pystencils.astnodes
import
PragmaBlock
class
PyStencilsKerncraftKernel
(
kerncraft
.
kernel
.
KernelCode
):
class
PyStencilsKerncraftKernel
(
kerncraft
.
kernel
.
KernelCode
):
...
@@ -39,6 +47,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
...
@@ -39,6 +47,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
# Initialize state
# Initialize state
self
.
asm_block
=
None
self
.
asm_block
=
None
self
.
_filename
=
filename
self
.
_filename
=
filename
self
.
_keep_intermediates
=
False
self
.
kernel_ast
=
ast
self
.
kernel_ast
=
ast
self
.
temporary_dir
=
TemporaryDirectory
()
self
.
temporary_dir
=
TemporaryDirectory
()
...
@@ -94,7 +103,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
...
@@ -94,7 +103,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
for
field
in
fields_accessed
:
for
field
in
fields_accessed
:
layout
=
get_layout_tuple
(
field
)
layout
=
get_layout_tuple
(
field
)
permuted_shape
=
list
(
field
.
shape
[
i
]
for
i
in
layout
)
permuted_shape
=
list
(
field
.
shape
[
i
]
for
i
in
layout
)
self
.
set_variable
(
field
.
name
,
str
(
field
.
dtype
),
tuple
(
permuted_shape
))
self
.
set_variable
(
field
.
name
,
(
str
(
field
.
dtype
),
),
tuple
(
permuted_shape
))
# Scalars may be safely ignored
# Scalars may be safely ignored
# for param in ast.get_parameters():
# for param in ast.get_parameters():
...
@@ -127,14 +136,211 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
...
@@ -127,14 +136,211 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
print
(
"
----------------------------- FLOPS -------------------------------
"
)
print
(
"
----------------------------- FLOPS -------------------------------
"
)
pprint
(
self
.
_flops
)
pprint
(
self
.
_flops
)
def
as_code
(
self
,
type_
=
'
iaca
'
,
openmp
=
False
):
def
get_kernel_header
(
self
,
name
=
'
kernel
'
):
"""
Generate and store kernel.h
:return: tuple of filename of header and file pointer of lockfile
"""
file_name
=
'
kernel.h
'
file_path
=
self
.
get_intermediate_location
(
file_name
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_SH
:
# use cache
with
open
(
file_path
)
as
f
:
code
=
f
.
read
()
else
:
# lock_mode == fcntl.LOCK_EX
# needs update
code
=
generate_c
(
self
.
kernel_ast
,
signature_only
=
True
)
with
open
(
file_path
,
'
w
'
)
as
f
:
f
.
write
(
code
)
self
.
release_exclusive_lock
(
lock_fp
)
# degrade to shared lock
return
file_name
,
lock_fp
def
get_kernel_code
(
self
,
openmp
=
False
,
name
=
'
kernel
'
):
"""
Generate and return compilable source code with kernel function from AST.
:param openmp: include openmp paragmas (or strip them)
:param name: name of kernel function
"""
assert
not
openmp
,
"
openmp is currently not support by pystencils
"
filename
=
'
kernel
'
if
openmp
:
filename
+=
'
-omp
'
filename
+=
'
.c
'
file_path
=
self
.
get_intermediate_location
(
filename
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_SH
:
# use cache
with
open
(
file_path
)
as
f
:
code
=
f
.
read
()
else
:
# lock_mode == fcntl.LOCK_EX
# needs update
kernel_template
=
Template
(
textwrap
.
dedent
(
"""
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#define RESTRICT __restrict__
#define FUNC_PREFIX
{{ includes }}
{{kernel_code}}
"""
))
header_list
=
get_headers
(
self
.
kernel_ast
)
includes
=
"
\n
"
.
join
([
"
#include %s
"
%
(
include_file
,)
for
include_file
in
header_list
])
ast
=
deepcopy
(
self
.
kernel_ast
)
# Strip "#pragma omp parallel" from within kernel, because main function takes care of
# that
if
len
(
ast
.
body
.
args
)
>
0
and
isinstance
(
ast
.
body
.
args
[
0
],
PragmaBlock
):
ast
.
body
.
args
[
0
].
pragma_line
=
''
code
=
kernel_template
.
render
(
kernel_code
=
generate_c
(
self
.
kernel_ast
,
dialect
=
'
c
'
),
includes
=
includes
)
# Store to file
with
open
(
file_path
,
'
w
'
)
as
f
:
f
.
write
(
code
)
print
(
code
)
self
.
release_exclusive_lock
(
lock_fp
)
# degrade to shared lock
return
file_path
,
lock_fp
CODE_TEMPLATE
=
textwrap
.
dedent
(
"""
#include <likwid.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include
"
kerncraft.h
"
#include
"
kernel.h
"
#define RESTRICT __restrict__
#define FUNC_PREFIX
void dummy(void *);
extern int var_false;
int main(int argc, char **argv) {
{%- for constantName, dataType in constants %}
// Constant {{constantName}}
{{dataType}} {{constantName}};
{{constantName}} = 0.23;
{%- endfor %}
// Declaring arrays
{%- for field_name, dataType, size in fields %}
// Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
// TODO initialize in parallel context in same order as they are touched
for (unsigned long long i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23;
{%- endfor %}
likwid_markerInit();
#pragma omp parallel
{
likwid_markerRegisterRegion(
"
loop
"
);
#pragma omp barrier
// Initializing arrays in same order as touched in kernel loop nest
//INIT_ARRAYS;
// Dummy call
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
likwid_markerStartRegion(
"
loop
"
);
}
for(; repeat > 0; --repeat) {
{{kernelName}}({{call_argument_list}});
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
}
}
likwid_markerStopRegion(
"
loop
"
);
}
likwid_markerClose();
return 0;
}
"""
)
def
get_main_code
(
self
,
kernel_function_name
=
'
kernel
'
):
"""
"""
Generate and return compilable source code.
Generate and return compilable source code
from AST
.
:param type: can be iaca or likwid.
:return: tuple of filename and shared lock file pointer
:param openmp: if true, openmp code will be generated
"""
"""
return
generate_benchmark
(
self
.
kernel_ast
,
likwid
=
type_
==
'
likwid
'
,
openmp
=
openmp
)
# TODO produce nicer code, including help text and other "comfort features".
assert
self
.
kernel_ast
is
not
None
,
"
AST does not exist, this could be due to running
"
\
"
based on a kernel description rather than code.
"
file_path
=
self
.
get_intermediate_location
(
'
main.c
'
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_SH
:
# use cache
with
open
(
file_path
)
as
f
:
code
=
f
.
read
()
else
:
# lock_mode == fcntl.LOCK_EX
# needs update
accessed_fields
=
{
f
.
name
:
f
for
f
in
ast
.
fields_accessed
}
constants
=
[]
fields
=
[]
call_parameters
=
[]
for
p
in
ast
.
get_parameters
():
if
not
p
.
is_field_parameter
:
constants
.
append
((
p
.
symbol
.
name
,
str
(
p
.
symbol
.
dtype
)))
call_parameters
.
append
(
p
.
symbol
.
name
)
else
:
assert
p
.
is_field_pointer
,
"
Benchmark implemented only for kernels with fixed loop size
"
field
=
accessed_fields
[
p
.
field_name
]
dtype
=
str
(
get_base_type
(
p
.
symbol
.
dtype
))
fields
.
append
((
p
.
field_name
,
dtype
,
prod
(
field
.
shape
)))
call_parameters
.
append
(
p
.
field_name
)
header_list
=
get_headers
(
ast
)
includes
=
"
\n
"
.
join
([
"
#include %s
"
%
(
include_file
,)
for
include_file
in
header_list
])
# Generate code
code
=
benchmark_template
.
render
(
kernelName
=
ast
.
function_name
,
fields
=
fields
,
constants
=
constants
,
call_agument_list
=
'
,
'
.
join
(
call_parameters
),
includes
=
includes
)
# Store to file
with
open
(
file_path
,
'
w
'
)
as
f
:
f
.
write
(
code
)
self
.
release_exclusive_lock
(
lock_fp
)
# degrade to shared lock
return
file_path
,
lock_fp
class
KerncraftParameters
(
DotDict
):
class
KerncraftParameters
(
DotDict
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment