Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
pystencils
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pycodegen
pystencils
Commits
ba7b20ac
Commit
ba7b20ac
authored
5 years ago
by
Stephan Seitz
Browse files
Options
Downloads
Patches
Plain Diff
Add 'cuda' compiler config (with preferred_block_size and always_autotune)
parent
0800d84a
No related branches found
No related tags found
1 merge request
!106
WIP: Cuda autotune
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
pystencils/cpu/cpujit.py
+11
-1
11 additions, 1 deletion
pystencils/cpu/cpujit.py
pystencils/gpucuda/cudajit.py
+2
-1
2 additions, 1 deletion
pystencils/gpucuda/cudajit.py
pystencils/gpucuda/indexing.py
+7
-2
7 additions, 2 deletions
pystencils/gpucuda/indexing.py
with
20 additions
and
4 deletions
pystencils/cpu/cpujit.py
+
11
−
1
View file @
ba7b20ac
...
@@ -175,9 +175,15 @@ def read_config():
...
@@ -175,9 +175,15 @@ def read_config():
(
'
object_cache
'
,
os
.
path
.
join
(
user_cache_dir
(
'
pystencils
'
),
'
objectcache
'
)),
(
'
object_cache
'
,
os
.
path
.
join
(
user_cache_dir
(
'
pystencils
'
),
'
objectcache
'
)),
(
'
clear_cache_on_start
'
,
False
),
(
'
clear_cache_on_start
'
,
False
),
])
])
default_cuda_config
=
OrderedDict
([
(
'
always_autotune
'
,
False
),
(
'
preferred_block_size
'
,
(
16
,
16
,
1
)),
])
default_config
=
OrderedDict
([(
'
compiler
'
,
default_compiler_config
),
default_config
=
OrderedDict
([(
'
compiler
'
,
default_compiler_config
),
(
'
cache
'
,
default_cache_config
)])
(
'
cache
'
,
default_cache_config
),
(
'
cuda
'
,
default_cuda_config
)
])
config_path
,
config_exists
=
get_configuration_file_path
()
config_path
,
config_exists
=
get_configuration_file_path
()
config
=
default_config
.
copy
()
config
=
default_config
.
copy
()
...
@@ -219,6 +225,10 @@ def get_cache_config():
...
@@ -219,6 +225,10 @@ def get_cache_config():
return
_config
[
'
cache
'
]
return
_config
[
'
cache
'
]
def
get_cuda_config
():
return
_config
[
'
cuda
'
]
def
add_or_change_compiler_flags
(
flags
):
def
add_or_change_compiler_flags
(
flags
):
if
not
isinstance
(
flags
,
list
)
and
not
isinstance
(
flags
,
tuple
):
if
not
isinstance
(
flags
,
list
)
and
not
isinstance
(
flags
,
tuple
):
flags
=
[
flags
]
flags
=
[
flags
]
...
...
This diff is collapsed.
Click to expand it.
pystencils/gpucuda/cudajit.py
+
2
−
1
View file @
ba7b20ac
...
@@ -4,6 +4,7 @@ import numpy as np
...
@@ -4,6 +4,7 @@ import numpy as np
import
pystencils
import
pystencils
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.cpu.cpujit
import
get_cuda_config
from
pystencils.data_types
import
StructType
from
pystencils.data_types
import
StructType
from
pystencils.field
import
FieldType
from
pystencils.field
import
FieldType
from
pystencils.gpucuda.texture_utils
import
ndarray_to_tex
from
pystencils.gpucuda.texture_utils
import
ndarray_to_tex
...
@@ -88,7 +89,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
...
@@ -88,7 +89,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
tex
.
filter_mode
,
tex
.
use_normalized_coordinates
,
tex
.
read_as_integer
)
tex
.
filter_mode
,
tex
.
use_normalized_coordinates
,
tex
.
read_as_integer
)
args
=
_build_numpy_argument_list
(
parameters
,
full_arguments
)
args
=
_build_numpy_argument_list
(
parameters
,
full_arguments
)
indexing
=
kernel_function_node
.
indexing
indexing
=
kernel_function_node
.
indexing
if
kernel_function_node
.
do_cudaautotune
:
if
kernel_function_node
.
do_cudaautotune
or
get_cuda_config
()[
'
always_autotune
'
]
:
block_and_thread_numbers
=
(
block_and_thread_numbers
=
(
indexing
.
autotune_call_parameters
(
partial
(
func
,
*
args
),
indexing
.
autotune_call_parameters
(
partial
(
func
,
*
args
),
shape
,
shape
,
...
...
This diff is collapsed.
Click to expand it.
pystencils/gpucuda/indexing.py
+
7
−
2
View file @
ba7b20ac
...
@@ -7,6 +7,7 @@ from sympy.core.cache import cacheit
...
@@ -7,6 +7,7 @@ from sympy.core.cache import cacheit
from
pystencils.astnodes
import
Block
,
Conditional
from
pystencils.astnodes
import
Block
,
Conditional
from
pystencils.cache
import
disk_cache
from
pystencils.cache
import
disk_cache
from
pystencils.cpu.cpujit
import
get_cuda_config
from
pystencils.data_types
import
TypedSymbol
,
create_type
from
pystencils.data_types
import
TypedSymbol
,
create_type
from
pystencils.integer_functions
import
div_ceil
,
div_floor
from
pystencils.integer_functions
import
div_ceil
,
div_floor
from
pystencils.slicing
import
normalize_slice
from
pystencils.slicing
import
normalize_slice
...
@@ -130,7 +131,7 @@ class AbstractIndexing(abc.ABC):
...
@@ -130,7 +131,7 @@ class AbstractIndexing(abc.ABC):
current_best
=
block_and_thread_numbers
current_best
=
block_and_thread_numbers
print
(
f
'
{
current_best
}
is the best out of
{
self
.
_autotune_block_sizes
or
self
.
AUTOTUNE_BLOCK_SIZES
}
'
)
print
(
f
'
{
current_best
}
is the best out of
{
self
.
_autotune_block_sizes
or
self
.
AUTOTUNE_BLOCK_SIZES
}
'
)
self
.
_block_size
=
current_best
self
.
_block_size
=
current_best
[
'
block
'
]
return
current_best
return
current_best
return
_autotune_call_parameters
(
self
,
return
_autotune_call_parameters
(
self
,
call_shape
,
call_shape
,
...
@@ -157,7 +158,10 @@ class BlockIndexing(AbstractIndexing):
...
@@ -157,7 +158,10 @@ class BlockIndexing(AbstractIndexing):
AUTOTUNE_NUM_CALLS
=
10
AUTOTUNE_NUM_CALLS
=
10
def
__init__
(
self
,
field
,
iteration_slice
,
def
__init__
(
self
,
field
,
iteration_slice
,
block_size
=
(
16
,
16
,
1
),
permute_block_size_dependent_on_layout
=
True
,
compile_time_block_size
=
False
,
block_size
=
tuple
(
get_cuda_config
()[
'
preferred_block_size
'
]),
permute_block_size_dependent_on_layout
=
True
,
compile_time_block_size
=
False
,
maximum_block_size
=
(
1024
,
1024
,
64
)):
maximum_block_size
=
(
1024
,
1024
,
64
)):
if
field
.
spatial_dimensions
>
3
:
if
field
.
spatial_dimensions
>
3
:
raise
NotImplementedError
(
"
This indexing scheme supports at most 3 spatial dimensions
"
)
raise
NotImplementedError
(
"
This indexing scheme supports at most 3 spatial dimensions
"
)
...
@@ -304,6 +308,7 @@ class LineIndexing(AbstractIndexing):
...
@@ -304,6 +308,7 @@ class LineIndexing(AbstractIndexing):
self
.
_coordinates
=
coordinates
self
.
_coordinates
=
coordinates
self
.
_iterationSlice
=
normalize_slice
(
iteration_slice
,
field
.
spatial_shape
)
self
.
_iterationSlice
=
normalize_slice
(
iteration_slice
,
field
.
spatial_shape
)
self
.
_symbolicShape
=
[
e
if
isinstance
(
e
,
sp
.
Basic
)
else
None
for
e
in
field
.
spatial_shape
]
self
.
_symbolicShape
=
[
e
if
isinstance
(
e
,
sp
.
Basic
)
else
None
for
e
in
field
.
spatial_shape
]
self
.
_autotune_block_sizes
=
None
@property
@property
def
coordinates
(
self
):
def
coordinates
(
self
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment