Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
pystencils
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pycodegen
pystencils
Commits
b49316af
Commit
b49316af
authored
4 years ago
by
Michael Kuron
Browse files
Options
Downloads
Patches
Plain Diff
Make the RNG node behave more like a regular node
parent
2c25776f
No related branches found
No related tags found
1 merge request
!194
Make the RNG node behave more like a regular node
Pipeline
#28492
waiting for manual action
Stage: test
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
pystencils/astnodes.py
+6
-2
6 additions, 2 deletions
pystencils/astnodes.py
pystencils/rng.py
+20
-30
20 additions, 30 deletions
pystencils/rng.py
pystencils_tests/test_random.py
+39
-39
39 additions, 39 deletions
pystencils_tests/test_random.py
with
65 additions
and
71 deletions
pystencils/astnodes.py
+
6
−
2
View file @
b49316af
...
...
@@ -37,8 +37,12 @@ class Node:
def
subs
(
self
,
subs_dict
)
->
None
:
"""
Inplace! Substitute, similar to sympy
'
s but modifies the AST inplace.
"""
for
a
in
self
.
args
:
a
.
subs
(
subs_dict
)
for
i
,
a
in
enumerate
(
self
.
args
):
result
=
a
.
subs
(
subs_dict
)
if
isinstance
(
a
,
sp
.
Expr
):
# sympy expressions' subs is out-of-place
self
.
args
[
i
]
=
result
else
:
# all other should be in-place
assert
result
is
None
@property
def
func
(
self
):
...
...
This diff is collapsed.
Click to expand it.
pystencils/rng.py
+
20
−
30
View file @
b49316af
...
...
@@ -19,11 +19,9 @@ def _get_rng_template(name, data_type, num_vars):
return
template
def
_get_rng_code
(
template
,
dialect
,
vector_instruction_set
,
time_step
,
coordinates
,
keys
,
dim
,
result_symbols
):
parameters
=
[
time_step
]
+
coordinates
+
[
0
]
*
(
3
-
dim
)
+
list
(
keys
)
def
_get_rng_code
(
template
,
dialect
,
vector_instruction_set
,
args
,
dim
,
result_symbols
):
if
dialect
==
'
cuda
'
or
(
dialect
==
'
c
'
and
vector_instruction_set
is
None
):
return
template
.
format
(
parameters
=
'
,
'
.
join
(
str
(
p
)
for
p
in
p
ar
ameter
s
),
return
template
.
format
(
parameters
=
'
,
'
.
join
(
str
(
a
)
for
a
in
ar
g
s
),
result_symbols
=
result_symbols
)
else
:
raise
NotImplementedError
(
"
Not yet implemented for this backend
"
)
...
...
@@ -31,47 +29,39 @@ def _get_rng_code(template, dialect, vector_instruction_set, time_step, coordina
class
RNGBase
(
CustomCodeNode
):
def
__init__
(
self
,
dim
,
time_step
=
TypedSymbol
(
"
time_step
"
,
np
.
uint32
),
offsets
=
(
0
,
0
,
0
),
keys
=
None
):
id
=
0
def
__init__
(
self
,
dim
,
time_step
=
TypedSymbol
(
"
time_step
"
,
np
.
uint32
),
offsets
=
None
,
keys
=
None
):
if
keys
is
None
:
keys
=
(
0
,)
*
self
.
_num_keys
if
offsets
is
None
:
offsets
=
(
0
,)
*
dim
if
len
(
keys
)
!=
self
.
_num_keys
:
raise
ValueError
(
f
"
Provided
{
len
(
keys
)
}
keys but need
{
self
.
_num_keys
}
"
)
if
len
(
offsets
)
!=
3
:
raise
ValueError
(
f
"
Provided
{
len
(
offsets
)
}
offsets but need
{
3
}
"
)
self
.
result_symbols
=
tuple
(
TypedSymbol
(
sp
.
Dummy
().
name
,
self
.
_data_type
)
for
_
in
range
(
self
.
_num_vars
))
symbols_read
=
[
s
for
s
in
keys
if
isinstance
(
s
,
sp
.
Symbol
)]
if
len
(
offsets
)
!=
dim
:
raise
ValueError
(
f
"
Provided
{
len
(
offsets
)
}
offsets but need
{
dim
}
"
)
coordinates
=
[
LoopOverCoordinate
.
get_loop_counter_symbol
(
i
)
+
offsets
[
i
]
for
i
in
range
(
dim
)]
if
dim
<
3
:
coordinates
.
append
(
0
)
self
.
result_symbols
=
tuple
(
TypedSymbol
(
f
'
random_
{
self
.
id
}
_
{
i
}
'
,
self
.
_data_type
)
for
i
in
range
(
self
.
_num_vars
))
symbols_read
=
set
.
union
(
*
[
sp
.
sympify
(
s
).
atoms
(
sp
.
Symbol
)
for
s
in
list
(
keys
)
+
coordinates
+
[
time_step
]])
super
().
__init__
(
""
,
symbols_read
=
symbols_read
,
symbols_defined
=
self
.
result_symbols
)
self
.
_time_step
=
time_step
self
.
_offsets
=
offsets
self
.
_coordinates
=
[
LoopOverCoordinate
.
get_loop_counter_symbol
(
i
)
+
offsets
[
i
]
for
i
in
range
(
dim
)]
self
.
headers
=
[
f
'"
{
self
.
_name
}
_rand.h
"'
]
self
.
keys
=
tuple
(
keys
)
self
.
_args
=
sp
.
sympify
((
dim
,
time_step
,
keys
))
self
.
_args
=
sp
.
sympify
([
time_step
,
*
coordinates
,
*
keys
])
self
.
_dim
=
dim
RNGBase
.
id
+=
1
@property
def
args
(
self
):
return
self
.
_args
@property
def
undefined_symbols
(
self
):
result
=
{
a
for
a
in
(
self
.
_time_step
,
*
self
.
_offsets
,
*
self
.
keys
)
if
isinstance
(
a
,
sp
.
Symbol
)}
loop_counters
=
[
LoopOverCoordinate
.
get_loop_counter_symbol
(
i
)
for
i
in
range
(
self
.
_dim
)]
result
.
update
(
loop_counters
)
return
result
def
subs
(
self
,
subs_dict
)
->
None
:
for
i
in
range
(
len
(
self
.
_coordinates
)):
self
.
_coordinates
[
i
]
=
self
.
_coordinates
[
i
].
subs
(
subs_dict
)
def
fast_subs
(
self
,
*
_
):
return
self
# nothing to replace inside this node - would destroy intermediate "dummy" by re-creating them
def
get_code
(
self
,
dialect
,
vector_instruction_set
):
template
=
_get_rng_template
(
self
.
_name
,
self
.
_data_type
,
self
.
_num_vars
)
return
_get_rng_code
(
template
,
dialect
,
vector_instruction_set
,
self
.
_time_step
,
self
.
_coordinates
,
self
.
key
s
,
self
.
_dim
,
self
.
result_symbols
)
self
.
arg
s
,
self
.
_dim
,
self
.
result_symbols
)
def
__repr__
(
self
):
return
(
"
,
"
.
join
([
'
{}
'
]
*
self
.
_num_vars
)
+
"
\\
leftarrow {}RNG
"
).
format
(
*
self
.
result_symbols
,
...
...
This diff is collapsed.
Click to expand it.
pystencils_tests/test_random.py
+
39
−
39
View file @
b49316af
...
...
@@ -12,58 +12,58 @@ philox_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302],
[[
2958765206
,
3725192638
,
2623672781
,
1373196132
],
[
850605163
,
1694561295
,
3285694973
,
2799652583
]]])
def
test_philox_double
():
for
target
in
(
'
cpu
'
,
'
gpu
'
):
if
target
==
'
gpu
'
:
pytest
.
importorskip
(
'
pycuda
'
)
@pytest.mark.parametrize
(
'
target
'
,
(
'
cpu
'
,
'
gpu
'
))
def
test_philox_double
(
target
):
if
target
==
'
gpu
'
:
pytest
.
importorskip
(
'
pycuda
'
)
dh
=
ps
.
create_data_handling
((
2
,
2
),
default_ghost_layers
=
0
,
default_target
=
target
)
f
=
dh
.
add_array
(
"
f
"
,
values_per_cell
=
2
)
dh
=
ps
.
create_data_handling
((
2
,
2
),
default_ghost_layers
=
0
,
default_target
=
target
)
f
=
dh
.
add_array
(
"
f
"
,
values_per_cell
=
2
)
dh
.
fill
(
'
f
'
,
42.0
)
dh
.
fill
(
'
f
'
,
42.0
)
philox_node
=
PhiloxTwoDoubles
(
dh
.
dim
)
assignments
=
[
philox_node
,
ps
.
Assignment
(
f
(
0
),
philox_node
.
result_symbols
[
0
]),
ps
.
Assignment
(
f
(
1
),
philox_node
.
result_symbols
[
1
])]
kernel
=
ps
.
create_kernel
(
assignments
,
target
=
dh
.
default_target
).
compile
()
philox_node
=
PhiloxTwoDoubles
(
dh
.
dim
)
assignments
=
[
philox_node
,
ps
.
Assignment
(
f
(
0
),
philox_node
.
result_symbols
[
0
]),
ps
.
Assignment
(
f
(
1
),
philox_node
.
result_symbols
[
1
])]
kernel
=
ps
.
create_kernel
(
assignments
,
target
=
dh
.
default_target
).
compile
()
dh
.
all_to_gpu
()
dh
.
run_kernel
(
kernel
,
time_step
=
124
)
dh
.
all_to_cpu
()
dh
.
all_to_gpu
()
dh
.
run_kernel
(
kernel
,
time_step
=
124
)
dh
.
all_to_cpu
()
arr
=
dh
.
gather_array
(
'
f
'
)
assert
np
.
logical_and
(
arr
<=
1.0
,
arr
>=
0
).
all
()
arr
=
dh
.
gather_array
(
'
f
'
)
assert
np
.
logical_and
(
arr
<=
1.0
,
arr
>=
0
).
all
()
x
=
philox_reference
[:,:,
0
::
2
]
y
=
philox_reference
[:,:,
1
::
2
]
z
=
x
^
y
<<
(
53
-
32
)
double_reference
=
z
*
2.
**-
53
+
2.
**-
54
assert
(
np
.
allclose
(
arr
,
double_reference
,
rtol
=
0
,
atol
=
np
.
finfo
(
np
.
float64
).
eps
))
x
=
philox_reference
[:,:,
0
::
2
]
y
=
philox_reference
[:,:,
1
::
2
]
z
=
x
^
y
<<
(
53
-
32
)
double_reference
=
z
*
2.
**-
53
+
2.
**-
54
assert
(
np
.
allclose
(
arr
,
double_reference
,
rtol
=
0
,
atol
=
np
.
finfo
(
np
.
float64
).
eps
))
def
test_philox_float
():
for
target
in
(
'
cpu
'
,
'
gpu
'
):
if
target
==
'
gpu
'
:
pytest
.
importorskip
(
'
pycuda
'
)
@pytest.mark.parametrize
(
'
target
'
,
(
'
cpu
'
,
'
gpu
'
))
def
test_philox_float
(
target
):
if
target
==
'
gpu
'
:
pytest
.
importorskip
(
'
pycuda
'
)
dh
=
ps
.
create_data_handling
((
2
,
2
),
default_ghost_layers
=
0
,
default_target
=
target
)
f
=
dh
.
add_array
(
"
f
"
,
values_per_cell
=
4
)
dh
=
ps
.
create_data_handling
((
2
,
2
),
default_ghost_layers
=
0
,
default_target
=
target
)
f
=
dh
.
add_array
(
"
f
"
,
values_per_cell
=
4
)
dh
.
fill
(
'
f
'
,
42.0
)
dh
.
fill
(
'
f
'
,
42.0
)
philox_node
=
PhiloxFourFloats
(
dh
.
dim
)
assignments
=
[
philox_node
]
+
[
ps
.
Assignment
(
f
(
i
),
philox_node
.
result_symbols
[
i
])
for
i
in
range
(
4
)]
kernel
=
ps
.
create_kernel
(
assignments
,
target
=
dh
.
default_target
).
compile
()
philox_node
=
PhiloxFourFloats
(
dh
.
dim
)
assignments
=
[
philox_node
]
+
[
ps
.
Assignment
(
f
(
i
),
philox_node
.
result_symbols
[
i
])
for
i
in
range
(
4
)]
kernel
=
ps
.
create_kernel
(
assignments
,
target
=
dh
.
default_target
).
compile
()
dh
.
all_to_gpu
()
dh
.
run_kernel
(
kernel
,
time_step
=
124
)
dh
.
all_to_cpu
()
arr
=
dh
.
gather_array
(
'
f
'
)
assert
np
.
logical_and
(
arr
<=
1.0
,
arr
>=
0
).
all
()
dh
.
all_to_gpu
()
dh
.
run_kernel
(
kernel
,
time_step
=
124
)
dh
.
all_to_cpu
()
arr
=
dh
.
gather_array
(
'
f
'
)
assert
np
.
logical_and
(
arr
<=
1.0
,
arr
>=
0
).
all
()
float_reference
=
philox_reference
*
2.
**-
32
+
2.
**-
33
assert
(
np
.
allclose
(
arr
,
float_reference
,
rtol
=
0
,
atol
=
np
.
finfo
(
np
.
float32
).
eps
))
float_reference
=
philox_reference
*
2.
**-
32
+
2.
**-
33
assert
(
np
.
allclose
(
arr
,
float_reference
,
rtol
=
0
,
atol
=
np
.
finfo
(
np
.
float32
).
eps
))
def
test_aesni_double
():
dh
=
ps
.
create_data_handling
((
2
,
2
),
default_ghost_layers
=
0
,
default_target
=
"
cpu
"
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment