Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
pystencils
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Sebastian Bindgen
pystencils
Commits
4b618138
Commit
4b618138
authored
5 years ago
by
Michael Kuron
Browse files
Options
Downloads
Patches
Plain Diff
Make create_staggered_kernel work with OpenMP
parent
b6c9f64c
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
pystencils/kernelcreation.py
+6
-4
6 additions, 4 deletions
pystencils/kernelcreation.py
pystencils_tests/test_staggered_kernel.py
+6
-3
6 additions, 3 deletions
pystencils_tests/test_staggered_kernel.py
with
12 additions
and
7 deletions
pystencils/kernelcreation.py
+
6
−
4
View file @
4b618138
...
@@ -24,6 +24,7 @@ def create_kernel(assignments,
...
@@ -24,6 +24,7 @@ def create_kernel(assignments,
cpu_openmp
=
False
,
cpu_openmp
=
False
,
cpu_vectorize_info
=
None
,
cpu_vectorize_info
=
None
,
cpu_blocking
=
None
,
cpu_blocking
=
None
,
omp_single_loop
=
True
,
gpu_indexing
=
'
block
'
,
gpu_indexing
=
'
block
'
,
gpu_indexing_params
=
MappingProxyType
({}),
gpu_indexing_params
=
MappingProxyType
({}),
use_textures_for_interpolation
=
True
,
use_textures_for_interpolation
=
True
,
...
@@ -47,6 +48,7 @@ def create_kernel(assignments,
...
@@ -47,6 +48,7 @@ def create_kernel(assignments,
skip_independence_check: don
'
t check that loop iterations are independent. This is needed e.g. for
skip_independence_check: don
'
t check that loop iterations are independent. This is needed e.g. for
periodicity kernel, that access the field outside the iteration bounds. Use with care!
periodicity kernel, that access the field outside the iteration bounds. Use with care!
cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
omp_single_loop: if OpenMP is active: whether multiple outer loops are permitted
cpu_vectorize_info: a dictionary with keys,
'
vector_instruction_set
'
,
'
assume_aligned
'
and
'
nontemporal
'
cpu_vectorize_info: a dictionary with keys,
'
vector_instruction_set
'
,
'
assume_aligned
'
and
'
nontemporal
'
for documentation of these parameters see vectorize function. Example:
for documentation of these parameters see vectorize function. Example:
'
{
'
instruction_set
'
:
'
avx512
'
,
'
assume_aligned
'
: True,
'
nontemporal
'
:True}
'
'
{
'
instruction_set
'
:
'
avx512
'
,
'
assume_aligned
'
: True,
'
nontemporal
'
:True}
'
...
@@ -99,7 +101,7 @@ def create_kernel(assignments,
...
@@ -99,7 +101,7 @@ def create_kernel(assignments,
if
cpu_blocking
:
if
cpu_blocking
:
omp_collapse
=
loop_blocking
(
ast
,
cpu_blocking
)
omp_collapse
=
loop_blocking
(
ast
,
cpu_blocking
)
if
cpu_openmp
:
if
cpu_openmp
:
add_openmp
(
ast
,
num_threads
=
cpu_openmp
,
collapse
=
omp_collapse
)
add_openmp
(
ast
,
num_threads
=
cpu_openmp
,
collapse
=
omp_collapse
,
assume_single_outer_loop
=
omp_single_loop
)
if
cpu_vectorize_info
:
if
cpu_vectorize_info
:
if
cpu_vectorize_info
is
True
:
if
cpu_vectorize_info
is
True
:
vectorize
(
ast
)
vectorize
(
ast
)
...
@@ -237,7 +239,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
...
@@ -237,7 +239,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
Returns:
Returns:
AST, see `create_kernel`
AST, see `create_kernel`
"""
"""
assert
'
iteration_slice
'
not
in
kwargs
and
'
ghost_layers
'
not
in
kwargs
assert
'
iteration_slice
'
not
in
kwargs
and
'
ghost_layers
'
not
in
kwargs
and
'
omp_single_loop
'
not
in
kwargs
if
isinstance
(
assignments
,
AssignmentCollection
):
if
isinstance
(
assignments
,
AssignmentCollection
):
subexpressions
=
assignments
.
subexpressions
+
[
a
for
a
in
assignments
.
main_assignments
subexpressions
=
assignments
.
subexpressions
+
[
a
for
a
in
assignments
.
main_assignments
...
@@ -325,7 +327,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
...
@@ -325,7 +327,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
if
target
==
'
cpu
'
:
if
target
==
'
cpu
'
:
from
pystencils.cpu
import
create_kernel
as
create_kernel_cpu
from
pystencils.cpu
import
create_kernel
as
create_kernel_cpu
ast
=
create_kernel_cpu
(
final_assignments
,
ghost_layers
=
ghost_layers
,
**
kwargs
)
ast
=
create_kernel_cpu
(
final_assignments
,
ghost_layers
=
ghost_layers
,
omp_single_loop
=
False
,
**
kwargs
)
else
:
else
:
ast
=
create_kernel
(
final_assignments
,
ghost_layers
=
ghost_layers
,
target
=
target
,
**
kwargs
)
ast
=
create_kernel
(
final_assignments
,
ghost_layers
=
ghost_layers
,
target
=
target
,
**
kwargs
)
return
ast
return
ast
...
@@ -341,6 +343,6 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
...
@@ -341,6 +343,6 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
remove_start_conditional
=
any
([
gl
[
0
]
==
0
for
gl
in
ghost_layers
])
remove_start_conditional
=
any
([
gl
[
0
]
==
0
for
gl
in
ghost_layers
])
prepend_optimizations
=
[
lambda
ast
:
remove_conditionals_in_staggered_kernel
(
ast
,
remove_start_conditional
),
prepend_optimizations
=
[
lambda
ast
:
remove_conditionals_in_staggered_kernel
(
ast
,
remove_start_conditional
),
move_constants_before_loop
]
move_constants_before_loop
]
ast
=
create_kernel
(
final_assignments
,
ghost_layers
=
ghost_layers
,
target
=
target
,
ast
=
create_kernel
(
final_assignments
,
ghost_layers
=
ghost_layers
,
target
=
target
,
omp_single_loop
=
False
,
cpu_prepend_optimizations
=
prepend_optimizations
,
**
kwargs
)
cpu_prepend_optimizations
=
prepend_optimizations
,
**
kwargs
)
return
ast
return
ast
This diff is collapsed.
Click to expand it.
pystencils_tests/test_staggered_kernel.py
+
6
−
3
View file @
4b618138
...
@@ -5,7 +5,7 @@ import pystencils as ps
...
@@ -5,7 +5,7 @@ import pystencils as ps
class
TestStaggeredDiffusion
:
class
TestStaggeredDiffusion
:
def
_run
(
self
,
num_neighbors
,
target
=
'
cpu
'
):
def
_run
(
self
,
num_neighbors
,
target
=
'
cpu
'
,
openmp
=
False
):
L
=
(
40
,
40
)
L
=
(
40
,
40
)
D
=
0.066
D
=
0.066
dt
=
1
dt
=
1
...
@@ -33,8 +33,8 @@ class TestStaggeredDiffusion:
...
@@ -33,8 +33,8 @@ class TestStaggeredDiffusion:
flux
+=
[
ps
.
Assignment
(
j
.
staggered_access
(
"
SW
"
),
xy_staggered
),
flux
+=
[
ps
.
Assignment
(
j
.
staggered_access
(
"
SW
"
),
xy_staggered
),
ps
.
Assignment
(
j
.
staggered_access
(
"
NW
"
),
xY_staggered
)]
ps
.
Assignment
(
j
.
staggered_access
(
"
NW
"
),
xY_staggered
)]
staggered_kernel
=
ps
.
create_staggered_kernel
(
flux
,
target
=
dh
.
default_target
).
compile
()
staggered_kernel
=
ps
.
create_staggered_kernel
(
flux
,
target
=
dh
.
default_target
,
cpu_openmp
=
openmp
).
compile
()
div_kernel
=
ps
.
create_kernel
(
update
,
target
=
dh
.
default_target
).
compile
()
div_kernel
=
ps
.
create_kernel
(
update
,
target
=
dh
.
default_target
,
cpu_openmp
=
openmp
).
compile
()
def
time_loop
(
steps
):
def
time_loop
(
steps
):
sync
=
dh
.
synchronization_function
([
c
.
name
])
sync
=
dh
.
synchronization_function
([
c
.
name
])
...
@@ -74,6 +74,9 @@ class TestStaggeredDiffusion:
...
@@ -74,6 +74,9 @@ class TestStaggeredDiffusion:
import
pystencils.opencl.autoinit
import
pystencils.opencl.autoinit
self
.
_run
(
4
,
'
opencl
'
)
self
.
_run
(
4
,
'
opencl
'
)
def
test_diffusion_openmp
(
self
):
self
.
_run
(
4
,
openmp
=
True
)
def
test_staggered_subexpressions
():
def
test_staggered_subexpressions
():
dh
=
ps
.
create_data_handling
((
10
,
10
),
periodicity
=
True
,
default_target
=
'
cpu
'
)
dh
=
ps
.
create_data_handling
((
10
,
10
),
periodicity
=
True
,
default_target
=
'
cpu
'
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment