Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
pystencils
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pycodegen
pystencils
Commits
a274d662
Commit
a274d662
authored
6 years ago
by
Martin Bauer
Browse files
Options
Downloads
Patches
Plain Diff
Staggered Kernel: different option for GPU (one block for each code path)
parent
8e4aae93
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
gpucuda/cudajit.py
+1
-0
1 addition, 0 deletions
gpucuda/cudajit.py
kernelcreation.py
+34
-8
34 additions, 8 deletions
kernelcreation.py
with
35 additions
and
8 deletions
gpucuda/cudajit.py
+
1
−
0
View file @
a274d662
...
...
@@ -67,6 +67,7 @@ def make_python_function(kernel_function_node, argument_dict=None):
cache
[
key
]
=
(
args
,
block_and_thread_numbers
)
cache_values
.
append
(
kwargs
)
# keep objects alive such that ids remain unique
func
(
*
args
,
**
block_and_thread_numbers
)
# import pycuda.driver as cuda
# cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
wrapper
.
ast
=
kernel_function_node
wrapper
.
parameters
=
kernel_function_node
.
get_parameters
()
...
...
This diff is collapsed.
Click to expand it.
kernelcreation.py
+
34
−
8
View file @
a274d662
from
types
import
MappingProxyType
import
sympy
as
sp
import
itertools
from
pystencils.assignment
import
Assignment
from
pystencils.astnodes
import
LoopOverCoordinate
,
Conditional
,
Block
,
SympyAssignment
from
pystencils.cpu.vectorization
import
vectorize
...
...
@@ -158,7 +159,8 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do
raise
ValueError
(
"
Unknown target %s. Has to be either
'
cpu
'
or
'
gpu
'"
%
(
target
,))
def
create_staggered_kernel
(
staggered_field
,
expressions
,
subexpressions
=
(),
target
=
'
cpu
'
,
**
kwargs
):
def
create_staggered_kernel
(
staggered_field
,
expressions
,
subexpressions
=
(),
target
=
'
cpu
'
,
gpu_exclusive_conditions
=
False
,
**
kwargs
):
"""
Kernel that updates a staggered field.
.. image:: /img/staggered_grid.svg
...
...
@@ -173,6 +175,7 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar
should be updated.
subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
target:
'
cpu
'
or
'
gpu
'
gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed
Returns:
...
...
@@ -191,18 +194,41 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar
"
same length.
"
final_assignments
=
[]
for
d
in
range
(
dim
):
cond
=
sp
.
And
(
*
[
conditions
[
i
]
for
i
in
range
(
dim
)
if
d
!=
i
])
last_conditional
=
None
def
add
(
condition
,
dimensions
,
as_else_block
=
False
):
nonlocal
last_conditional
if
staggered_field
.
index_dimensions
==
1
:
assignments
=
[
Assignment
(
staggered_field
(
d
),
expressions
[
d
])]
a_coll
=
AssignmentCollection
(
assignments
,
list
(
subexpressions
)).
new_filtered
([
staggered_field
(
d
)])
assignments
=
[
Assignment
(
staggered_field
(
d
),
expressions
[
d
])
for
d
in
dimensions
]
a_coll
=
AssignmentCollection
(
assignments
,
list
(
subexpressions
))
a_coll
=
a_coll
.
new_filtered
([
staggered_field
(
d
)
for
d
in
dimensions
])
elif
staggered_field
.
index_dimensions
==
2
:
assert
staggered_field
.
has_fixed_index_shape
assignments
=
[
Assignment
(
staggered_field
(
d
,
i
),
expr
)
for
i
,
expr
in
enumerate
(
expressions
[
d
])]
assignments
=
[
Assignment
(
staggered_field
(
d
,
i
),
expr
)
for
d
in
dimensions
for
i
,
expr
in
enumerate
(
expressions
[
d
])]
a_coll
=
AssignmentCollection
(
assignments
,
list
(
subexpressions
))
a_coll
=
a_coll
.
new_filtered
([
staggered_field
(
d
,
i
)
for
i
in
range
(
staggered_field
.
index_shape
[
1
])])
a_coll
=
a_coll
.
new_filtered
([
staggered_field
(
d
,
i
)
for
i
in
range
(
staggered_field
.
index_shape
[
1
])
for
d
in
dimensions
])
sp_assignments
=
[
SympyAssignment
(
a
.
lhs
,
a
.
rhs
)
for
a
in
a_coll
.
all_assignments
]
final_assignments
.
append
(
Conditional
(
cond
,
Block
(
sp_assignments
)))
if
as_else_block
and
last_conditional
:
last_conditional
.
false_block
=
Conditional
(
condition
,
Block
(
sp_assignments
))
last_conditional
=
last_conditional
.
false_block
else
:
last_conditional
=
Conditional
(
condition
,
Block
(
sp_assignments
))
final_assignments
.
append
(
last_conditional
)
if
target
==
'
cpu
'
or
not
gpu_exclusive_conditions
:
for
d
in
range
(
dim
):
cond
=
sp
.
And
(
*
[
conditions
[
i
]
for
i
in
range
(
dim
)
if
d
!=
i
])
add
(
cond
,
[
d
])
elif
target
==
'
gpu
'
:
full_conditions
=
[
sp
.
And
(
*
[
conditions
[
i
]
for
i
in
range
(
dim
)
if
d
!=
i
])
for
d
in
range
(
dim
)]
for
include
in
itertools
.
product
(
*
[[
1
,
0
]]
*
dim
):
case_conditions
=
sp
.
And
(
*
[
c
if
value
else
sp
.
Not
(
c
)
for
c
,
value
in
zip
(
full_conditions
,
include
)])
dimensions_to_include
=
[
i
for
i
in
range
(
dim
)
if
include
[
i
]]
if
dimensions_to_include
:
add
(
case_conditions
,
dimensions_to_include
,
True
)
ghost_layers
=
[(
1
,
0
)]
*
dim
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment