Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
pystencils
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pycodegen
pystencils
Commits
f340af58
Commit
f340af58
authored
10 months ago
by
Frederik Hennig
Browse files
Options
Downloads
Patches
Plain Diff
started implementing cupy JIT
parent
356a8343
No related branches found
No related tags found
1 merge request
!384
Fundamental GPU Support
Pipeline
#67192
failed
10 months ago
Stage: Code Quality
Stage: Unit Tests
Stage: legacy_test
Stage: docs
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
mypy.ini
+3
-0
3 additions, 0 deletions
mypy.ini
src/pystencils/backend/ast/expressions.py
+30
-0
30 additions, 0 deletions
src/pystencils/backend/ast/expressions.py
src/pystencils/backend/jit/gpu_cupy.py
+160
-0
160 additions, 0 deletions
src/pystencils/backend/jit/gpu_cupy.py
with
193 additions
and
0 deletions
mypy.ini
+
3
−
0
View file @
f340af58
...
...
@@ -19,3 +19,6 @@ ignore_missing_imports=true
[mypy-islpy.*]
ignore_missing_imports
=
true
[mypy-cupy.*]
ignore_missing_imports
=
true
This diff is collapsed.
Click to expand it.
src/pystencils/backend/ast/expressions.py
+
30
−
0
View file @
f340af58
...
...
@@ -697,3 +697,33 @@ class PsArrayInitList(PsExpression):
def
__repr__
(
self
)
->
str
:
return
f
"
PsArrayInitList(
{
repr
(
self
.
_items
)
}
)
"
def
evaluate_expression
(
expr
:
PsExpression
,
valuation
:
dict
[
str
,
Any
]
)
->
Any
:
"""
Evaluate a pystencils backend expression tree with values assigned to symbols according to the given valuation.
Only a subset of expression nodes can be processed by this evaluator.
"""
def
visit
(
node
):
match
node
:
case
PsSymbolExpr
(
symb
):
return
valuation
[
symb
.
name
]
case
PsConstantExpr
(
c
):
return
c
.
value
case
PsUnOp
(
op1
)
if
node
.
python_operator
is
not
None
:
return
node
.
python_operator
(
visit
(
op1
))
case
PsBinOp
(
op1
,
op2
)
if
node
.
python_operator
is
not
None
:
return
node
.
python_operator
(
visit
(
op1
),
visit
(
op2
))
case
other
:
raise
NotImplementedError
(
f
"
Unable to evaluate
{
other
}
: No implementation available.
"
)
return
visit
(
expr
)
This diff is collapsed.
Click to expand it.
src/pystencils/backend/jit/gpu_cupy.py
0 → 100644
+
160
−
0
View file @
f340af58
from
typing
import
Callable
,
Any
from
dataclasses
import
dataclass
import
cupy
as
cp
from
...enums
import
Target
from
...types
import
PsType
from
.jit
import
JitBase
,
JitError
from
..kernelfunction
import
(
KernelFunction
,
GpuKernelFunction
,
FieldPointerParam
,
FieldShapeParam
,
FieldStrideParam
,
KernelParameter
,
)
from
..emission
import
emit_code
from
...include
import
get_pystencils_include_path
@dataclass
class
LaunchGrid
:
grid
:
tuple
[
int
,
int
,
int
]
block
:
tuple
[
int
,
int
,
int
]
class
CupyKernelWrapper
:
def
__init__
(
self
,
kfunc
:
GpuKernelFunction
,
raw_kernel
:
Any
,
block_size
:
tuple
[
int
,
int
,
int
],
):
self
.
_kfunc
=
kfunc
self
.
_kernel
=
raw_kernel
self
.
_block_size
=
block_size
def
__call__
(
self
,
**
kwargs
:
Any
)
->
Any
:
kernel_args
,
launch_grid
=
self
.
_get_args
(
**
kwargs
)
device
=
self
.
_get_device
(
kernel_args
)
with
cp
.
cuda
.
device
(
device
):
self
.
_kernel
(
launch_grid
.
grid
,
launch_grid
.
block
,
kernel_args
)
def
_get_device
(
self
,
kernel_args
):
devices
=
set
(
a
.
device
.
id
for
a
in
kernel_args
if
type
(
a
)
is
cp
.
ndarray
)
if
len
(
devices
)
!=
1
:
raise
JitError
(
"
Could not determine CUDA device to execute on
"
)
return
devices
.
pop
()
def
_get_args
(
self
,
**
kwargs
)
->
tuple
[
tuple
,
LaunchGrid
]:
args
=
[]
valuation
:
dict
[
str
,
Any
]
=
dict
()
def
add_arg
(
name
:
str
,
arg
:
Any
,
dtype
:
PsType
):
nptype
=
dtype
.
numpy_dtype
assert
nptype
is
not
None
typecast
=
nptype
.
type
arg
=
typecast
(
arg
)
args
.
append
(
arg
)
valuation
[
name
]
=
arg
# Collect parameter values
arr
:
cp
.
ndarray
for
kparam
in
self
.
_kfunc
.
parameters
:
match
kparam
:
case
FieldPointerParam
(
_
,
dtype
,
field
):
arr
=
kwargs
[
field
.
name
]
if
arr
.
dtype
!=
field
.
dtype
.
numpy_dtype
:
raise
JitError
(
f
"
Data type mismatch at array argument
{
field
.
name
}
:
"
f
"
Expected
{
field
.
dtype
}
, got
{
arr
.
dtype
}
"
)
args
.
append
(
arr
)
case
FieldShapeParam
(
name
,
dtype
,
field
,
coord
):
arr
=
kwargs
[
field
.
name
]
add_arg
(
name
,
arr
.
shape
[
coord
],
dtype
)
case
FieldStrideParam
(
name
,
dtype
,
field
,
coord
):
arr
=
kwargs
[
field
.
name
]
add_arg
(
name
,
arr
.
strides
[
coord
],
dtype
)
case
KernelParameter
(
name
,
dtype
):
val
:
Any
=
kwargs
[
name
]
add_arg
(
name
,
val
,
dtype
)
# Determine launch grid
from
..ast.expressions
import
evaluate_expression
symbolic_threads_range
=
self
.
_kfunc
.
threads_range
threads_range
:
list
[
int
]
=
[
evaluate_expression
(
expr
,
valuation
)
for
expr
in
symbolic_threads_range
.
num_work_items
]
if
symbolic_threads_range
.
dim
<
3
:
threads_range
+=
[
1
]
*
(
3
-
symbolic_threads_range
.
dim
)
def
div_ceil
(
a
,
b
):
return
a
//
b
if
a
%
b
==
0
else
a
//
b
+
1
# TODO: Refine this?
grid_size
=
tuple
(
div_ceil
(
threads
,
tpb
)
for
threads
,
tpb
in
zip
(
threads_range
,
self
.
_block_size
)
)
assert
len
(
grid_size
)
==
3
launch_grid
=
LaunchGrid
(
grid_size
,
self
.
_block_size
)
return
tuple
(
args
),
launch_grid
class
CupyJit
(
JitBase
):
def
__init__
(
self
,
default_block_size
:
tuple
[
int
,
int
,
int
]
=
(
128
,
2
,
1
)):
# TODO: Fp16 headers
self
.
_runtime_headers
=
{
"
<cstdint>
"
,
'"
gpu_defines.h
"'
}
self
.
_default_block_size
=
default_block_size
def
compile
(
self
,
kfunc
:
KernelFunction
)
->
Callable
[...,
None
]:
import
cupy
as
cp
if
not
isinstance
(
kfunc
,
GpuKernelFunction
)
or
kfunc
.
target
!=
Target
.
CUDA
:
raise
ValueError
(
"
The CupyJit just-in-time compiler only accepts kernels generated for CUDA or HIP
"
)
options
=
self
.
_compiler_options
()
prelude
=
self
.
_prelude
(
kfunc
)
kernel_code
=
self
.
_kernel_code
(
kfunc
)
code
=
prelude
+
kernel_code
raw_kernel
=
cp
.
RawKernel
(
code
,
kfunc
.
name
,
options
=
options
,
backend
=
"
nvrtc
"
,
jitify
=
True
)
return
CupyKernelWrapper
(
kfunc
,
raw_kernel
,
self
.
_default_block_size
)
def
_compiler_options
(
self
)
->
tuple
[
str
,
...]:
options
=
[
"
-w
"
,
"
-std=c++11
"
]
options
.
append
(
"
-I
"
+
get_pystencils_include_path
())
return
tuple
(
options
)
def
_prelude
(
self
,
kfunc
:
GpuKernelFunction
)
->
str
:
headers
=
self
.
_runtime_headers
headers
|=
kfunc
.
required_headers
code
=
"
\n
"
.
join
(
f
"
#include
{
header
}
"
for
header
in
headers
)
code
+=
"
\n\n
#define RESTRICT __restrict__
\n\n
"
return
code
def
_kernel_code
(
self
,
kfunc
:
GpuKernelFunction
)
->
str
:
kernel_code
=
emit_code
(
kfunc
)
return
f
'
extern
"
C
"
{{
\n
{
kernel_code
}
\n
}}
\n
'
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment