Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
lbmpy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pycodegen
lbmpy
Commits
1ab17ebe
Commit
1ab17ebe
authored
2 years ago
by
Markus Holzer
Committed by
Helen Schottenhamml
2 years ago
Browse files
Options
Downloads
Patches
Plain Diff
[BugFix] Fix missing comunicated PDFs for FreeSlip
parent
07be81fe
No related branches found
No related tags found
1 merge request
!139
[BugFix] Fix missing comunicated PDFs for FreeSlip
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
lbmpy/advanced_streaming/communication.py
+111
-129
111 additions, 129 deletions
lbmpy/advanced_streaming/communication.py
with
111 additions
and
129 deletions
lbmpy/advanced_streaming/communication.py
+
111
−
129
View file @
1ab17ebe
import
itertools
import
itertools
from
pystencils
import
Field
,
Assignment
from
pystencils
import
CreateKernelConfig
,
Field
,
Assignment
,
AssignmentCollection
from
pystencils.slicing
import
shift_slice
,
get_slice_before_ghost_layer
,
normalize_slice
from
pystencils.slicing
import
shift_slice
,
get_slice_before_ghost_layer
,
normalize_slice
from
lbmpy.advanced_streaming.utility
import
is_inplace
,
get_accessor
,
numeric_index
,
\
from
lbmpy.advanced_streaming.utility
import
is_inplace
,
get_accessor
,
numeric_index
,
Timestep
,
get_timesteps
numeric_offsets
,
Timestep
,
get_timesteps
from
pystencils.datahandling
import
SerialDataHandling
from
pystencils.datahandling
import
SerialDataHandling
from
pystencils.enums
import
Target
from
pystencils.enums
import
Target
from
itertools
import
chain
from
itertools
import
chain
def
_trim_slice_in_direction
(
slices
,
direction
):
class
LBMPeriodicityHandling
:
assert
len
(
slices
)
==
len
(
direction
)
result
=
[]
def
__init__
(
self
,
stencil
,
data_handling
,
pdf_field_name
,
for
s
,
d
in
zip
(
slices
,
direction
):
streaming_pattern
=
'
pull
'
,
ghost_layers
=
1
,
if
isinstance
(
s
,
int
):
pycuda_direct_copy
=
True
):
result
.
append
(
s
)
"""
continue
Periodicity Handling for Lattice Boltzmann Streaming.
start
=
s
.
start
+
1
if
d
==
-
1
else
s
.
start
stop
=
s
.
stop
-
1
if
d
==
1
else
s
.
stop
result
.
append
(
slice
(
start
,
stop
,
s
.
step
))
return
tuple
(
result
)
**On the usage with cuda:**
- pycuda allows the copying of sliced arrays within device memory using the numpy syntax,
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case.
"""
if
not
isinstance
(
data_handling
,
SerialDataHandling
):
raise
ValueError
(
'
Only serial data handling is supported!
'
)
self
.
stencil
=
stencil
self
.
dim
=
stencil
.
D
self
.
dh
=
data_handling
def
_extend_dir
(
direction
):
assert
data_handling
.
default_target
in
[
Target
.
CPU
,
Target
.
GPU
]
if
len
(
direction
)
==
0
:
self
.
target
=
data_handling
.
default_target
yield
tuple
()
elif
direction
[
0
]
==
0
:
for
d
in
[
-
1
,
0
,
1
]:
for
rest
in
_extend_dir
(
direction
[
1
:]):
yield
(
d
,
)
+
rest
else
:
for
rest
in
_extend_dir
(
direction
[
1
:]):
yield
(
direction
[
0
],
)
+
rest
self
.
pdf_field_name
=
pdf_field_name
self
.
ghost_layers
=
ghost_layers
self
.
periodicity
=
data_handling
.
periodicity
self
.
inplace_pattern
=
is_inplace
(
streaming_pattern
)
def
_get_neighbour_transform
(
direction
,
ghost_layers
):
self
.
cpu
=
self
.
target
==
Target
.
CPU
return
tuple
(
d
*
(
ghost_layers
+
1
)
for
d
in
direct
ion
)
self
.
pycuda_direct_copy
=
self
.
target
==
Target
.
GPU
and
pycuda_
direct
_copy
def
is_copy_direction
(
direction
):
s
=
0
for
d
,
p
in
zip
(
direction
,
self
.
periodicity
):
s
+=
abs
(
d
)
if
d
!=
0
and
not
p
:
return
False
def
_fix_length_one_slices
(
slices
):
return
s
!=
0
"""
Slices of length one are replaced by their start value for correct periodic shifting
"""
if
isinstance
(
slices
,
int
):
full_stencil
=
itertools
.
product
(
*
([
-
1
,
0
,
1
]
for
_
in
range
(
self
.
dim
)))
return
slices
copy_directions
=
tuple
(
filter
(
is_copy_direction
,
full_stencil
))
elif
isinstance
(
slices
,
slice
):
self
.
comm_slices
=
[]
if
slices
.
stop
is
not
None
and
abs
(
slices
.
start
-
slices
.
stop
)
==
1
:
timesteps
=
get_timesteps
(
streaming_pattern
)
return
slices
.
start
for
timestep
in
timesteps
:
elif
slices
.
stop
is
None
and
slices
.
start
==
-
1
:
slices_per_comm_dir
=
get_communication_slices
(
stencil
=
stencil
,
return
-
1
# [-1:] also has length one
comm_stencil
=
copy_directions
,
streaming_pattern
=
streaming_pattern
,
prev_timestep
=
timestep
,
ghost_layers
=
ghost_layers
)
self
.
comm_slices
.
append
(
list
(
chain
.
from_iterable
(
v
for
k
,
v
in
slices_per_comm_dir
.
items
())))
if
self
.
target
==
Target
.
GPU
and
not
pycuda_direct_copy
:
self
.
device_copy_kernels
=
list
()
for
timestep
in
timesteps
:
self
.
device_copy_kernels
.
append
(
self
.
_compile_copy_kernels
(
timestep
))
def
__call__
(
self
,
prev_timestep
=
Timestep
.
BOTH
):
if
self
.
cpu
:
self
.
_periodicity_handling_cpu
(
prev_timestep
)
else
:
else
:
return
slices
self
.
_periodicity_handling_gpu
(
prev_timestep
)
else
:
return
tuple
(
_fix_length_one_slices
(
s
)
for
s
in
slices
)
def
_periodicity_handling_cpu
(
self
,
prev_timestep
):
arr
=
self
.
dh
.
cpu_arrays
[
self
.
pdf_field_name
]
comm_slices
=
self
.
comm_slices
[
prev_timestep
.
idx
]
for
src
,
dst
in
comm_slices
:
arr
[
dst
]
=
arr
[
src
]
def
_compile_copy_kernels
(
self
,
timestep
):
pdf_field
=
self
.
dh
.
fields
[
self
.
pdf_field_name
]
kernels
=
[]
for
src
,
dst
in
self
.
comm_slices
[
timestep
.
idx
]:
kernels
.
append
(
periodic_pdf_copy_kernel
(
pdf_field
,
src
,
dst
,
target
=
self
.
target
))
return
kernels
def
_periodicity_handling_gpu
(
self
,
prev_timestep
):
arr
=
self
.
dh
.
gpu_arrays
[
self
.
pdf_field_name
]
if
self
.
pycuda_direct_copy
:
for
src
,
dst
in
self
.
comm_slices
[
prev_timestep
.
idx
]:
arr
[
dst
]
=
arr
[
src
]
else
:
kernel_args
=
{
self
.
pdf_field_name
:
arr
}
for
kernel
in
self
.
device_copy_kernels
[
prev_timestep
.
idx
]:
kernel
(
**
kernel_args
)
def
get_communication_slices
(
def
get_communication_slices
(
...
@@ -60,7 +104,7 @@ def get_communication_slices(
...
@@ -60,7 +104,7 @@ def get_communication_slices(
Return the source and destination slices for periodicity handling or communication between blocks.
Return the source and destination slices for periodicity handling or communication between blocks.
:param stencil: The stencil used by the LB method.
:param stencil: The stencil used by the LB method.
:param comm_stencil: The stencil defining the communication directions. If None, it will be set to the
:param comm_stencil: The stencil defining the communication directions. If None, it will be set to the
full stencil (D2Q9 in 2D, D3Q27 in 3D, etc.).
full stencil (D2Q9 in 2D, D3Q27 in 3D, etc.).
:param streaming_pattern: The streaming pattern.
:param streaming_pattern: The streaming pattern.
:param prev_timestep: Timestep after which communication is run.
:param prev_timestep: Timestep after which communication is run.
...
@@ -83,13 +127,10 @@ def get_communication_slices(
...
@@ -83,13 +127,10 @@ def get_communication_slices(
for
streaming_dir
in
set
(
_extend_dir
(
comm_dir
))
&
set
(
stencil
):
for
streaming_dir
in
set
(
_extend_dir
(
comm_dir
))
&
set
(
stencil
):
d
=
stencil
.
index
(
streaming_dir
)
d
=
stencil
.
index
(
streaming_dir
)
write_offsets
=
numeric_offsets
(
write_accesses
[
d
])
write_index
=
numeric_index
(
write_accesses
[
d
])[
0
]
write_index
=
numeric_index
(
write_accesses
[
d
])[
0
]
tangential_dir
=
tuple
(
s
-
c
for
s
,
c
in
zip
(
streaming_dir
,
comm_dir
))
origin_slice
=
get_slice_before_ghost_layer
(
comm_dir
,
ghost_layers
=
ghost_layers
,
thickness
=
1
)
origin_slice
=
get_slice_before_ghost_layer
(
comm_dir
,
ghost_layers
=
ghost_layers
,
thickness
=
1
)
origin_slice
=
_fix_length_one_slices
(
origin_slice
)
src_slice
=
_fix_length_one_slices
(
origin_slice
)
src_slice
=
shift_slice
(
_trim_slice_in_direction
(
origin_slice
,
tangential_dir
),
write_offsets
)
neighbour_transform
=
_get_neighbour_transform
(
comm_dir
,
ghost_layers
)
neighbour_transform
=
_get_neighbour_transform
(
comm_dir
,
ghost_layers
)
dst_slice
=
shift_slice
(
src_slice
,
neighbour_transform
)
dst_slice
=
shift_slice
(
src_slice
,
neighbour_transform
)
...
@@ -130,8 +171,9 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
...
@@ -130,8 +171,9 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
assert
offset
==
[
_stop
(
s1
)
-
_stop
(
s2
)
for
s1
,
s2
in
zip
(
normalized_from_slice
,
normalized_to_slice
)],
\
assert
offset
==
[
_stop
(
s1
)
-
_stop
(
s2
)
for
s1
,
s2
in
zip
(
normalized_from_slice
,
normalized_to_slice
)],
\
"
Slices have to have same size
"
"
Slices have to have same size
"
copy_eq
=
Assignment
(
pdf_field
(
pdf_idx
),
pdf_field
[
tuple
(
offset
)](
pdf_idx
))
copy_eq
=
AssignmentCollection
(
main_assignments
=
[
Assignment
(
pdf_field
(
pdf_idx
),
pdf_field
[
tuple
(
offset
)](
pdf_idx
))])
ast
=
create_cuda_kernel
([
copy_eq
],
iteration_slice
=
dst_slice
,
skip_independence_check
=
True
)
config
=
CreateKernelConfig
(
iteration_slice
=
dst_slice
,
skip_independence_check
=
True
)
ast
=
create_cuda_kernel
(
copy_eq
,
config
=
config
)
if
target
==
Target
.
GPU
:
if
target
==
Target
.
GPU
:
from
pystencils.gpucuda
import
make_python_function
from
pystencils.gpucuda
import
make_python_function
return
make_python_function
(
ast
)
return
make_python_function
(
ast
)
...
@@ -139,92 +181,32 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
...
@@ -139,92 +181,32 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
raise
ValueError
(
'
Invalid target:
'
,
target
)
raise
ValueError
(
'
Invalid target:
'
,
target
)
class
LBMPeriodicityHandling
:
def
_extend_dir
(
direction
):
if
len
(
direction
)
==
0
:
def
__init__
(
self
,
stencil
,
data_handling
,
pdf_field_name
,
yield
tuple
()
streaming_pattern
=
'
pull
'
,
ghost_layers
=
1
,
elif
direction
[
0
]
==
0
:
pycuda_direct_copy
=
True
):
for
d
in
[
-
1
,
0
,
1
]:
"""
for
rest
in
_extend_dir
(
direction
[
1
:]):
Periodicity Handling for Lattice Boltzmann Streaming.
yield
(
d
,
)
+
rest
else
:
**On the usage with cuda:**
for
rest
in
_extend_dir
(
direction
[
1
:]):
- pycuda allows the copying of sliced arrays within device memory using the numpy syntax,
yield
(
direction
[
0
],
)
+
rest
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case.
"""
if
not
isinstance
(
data_handling
,
SerialDataHandling
):
raise
ValueError
(
'
Only serial data handling is supported!
'
)
self
.
stencil
=
stencil
self
.
dim
=
stencil
.
D
self
.
dh
=
data_handling
target
=
data_handling
.
default_target
assert
target
in
[
Target
.
CPU
,
Target
.
GPU
]
self
.
pdf_field_name
=
pdf_field_name
self
.
ghost_layers
=
ghost_layers
periodicity
=
data_handling
.
periodicity
self
.
inplace_pattern
=
is_inplace
(
streaming_pattern
)
self
.
target
=
target
self
.
cpu
=
target
==
Target
.
CPU
self
.
pycuda_direct_copy
=
target
==
Target
.
GPU
and
pycuda_direct_copy
def
is_copy_direction
(
direction
):
s
=
0
for
d
,
p
in
zip
(
direction
,
periodicity
):
s
+=
abs
(
d
)
if
d
!=
0
and
not
p
:
return
False
return
s
!=
0
full_stencil
=
itertools
.
product
(
*
([
-
1
,
0
,
1
]
for
_
in
range
(
self
.
dim
)))
copy_directions
=
tuple
(
filter
(
is_copy_direction
,
full_stencil
))
self
.
comm_slices
=
[]
timesteps
=
get_timesteps
(
streaming_pattern
)
for
timestep
in
timesteps
:
slices_per_comm_dir
=
get_communication_slices
(
stencil
=
stencil
,
comm_stencil
=
copy_directions
,
streaming_pattern
=
streaming_pattern
,
prev_timestep
=
timestep
,
ghost_layers
=
ghost_layers
)
self
.
comm_slices
.
append
(
list
(
chain
.
from_iterable
(
v
for
k
,
v
in
slices_per_comm_dir
.
items
())))
if
target
==
Target
.
GPU
and
not
pycuda_direct_copy
:
self
.
device_copy_kernels
=
[]
for
timestep
in
timesteps
:
self
.
device_copy_kernels
.
append
(
self
.
_compile_copy_kernels
(
timestep
))
def
__call__
(
self
,
prev_timestep
=
Timestep
.
BOTH
):
if
self
.
cpu
:
self
.
_periodicity_handling_cpu
(
prev_timestep
)
else
:
self
.
_periodicity_handling_gpu
(
prev_timestep
)
def
_periodicity_handling_cpu
(
self
,
prev_timestep
):
def
_get_neighbour_transform
(
direction
,
ghost_layers
):
arr
=
self
.
dh
.
cpu_arrays
[
self
.
pdf_field_name
]
return
tuple
(
d
*
(
ghost_layers
+
1
)
for
d
in
direction
)
comm_slices
=
self
.
comm_slices
[
prev_timestep
.
idx
]
for
src
,
dst
in
comm_slices
:
arr
[
dst
]
=
arr
[
src
]
def
_compile_copy_kernels
(
self
,
timestep
):
pdf_field
=
self
.
dh
.
fields
[
self
.
pdf_field_name
]
kernels
=
[]
for
src
,
dst
in
self
.
comm_slices
[
timestep
.
idx
]:
kernels
.
append
(
periodic_pdf_copy_kernel
(
pdf_field
,
src
,
dst
,
target
=
self
.
target
))
return
kernels
def
_periodicity_handling_gpu
(
self
,
prev_timestep
):
def
_fix_length_one_slices
(
slices
):
arr
=
self
.
dh
.
gpu_arrays
[
self
.
pdf_field_name
]
"""
Slices of length one are replaced by their start value for correct periodic shifting
"""
if
self
.
pycuda_direct_copy
:
if
isinstance
(
slices
,
int
):
for
src
,
dst
in
self
.
comm_slices
[
prev_timestep
.
idx
]:
return
slices
arr
[
dst
]
=
arr
[
src
]
elif
isinstance
(
slices
,
slice
):
if
slices
.
stop
is
not
None
and
abs
(
slices
.
start
-
slices
.
stop
)
==
1
:
return
slices
.
start
elif
slices
.
stop
is
None
and
slices
.
start
==
-
1
:
return
-
1
# [-1:] also has length one
else
:
else
:
kernel_args
=
{
self
.
pdf_field_name
:
arr
}
return
slices
for
kernel
in
self
.
device_copy_kernels
[
prev_timestep
.
idx
]
:
else
:
kernel
(
**
kernel_arg
s
)
return
tuple
(
_fix_length_one_slices
(
s
)
for
s
in
slice
s
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment