Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
pystencils_walberla
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Stephan Seitz
pystencils_walberla
Commits
0f21a28f
Commit
0f21a28f
authored
6 years ago
by
Martin Bauer
Browse files
Options
Downloads
Patches
Plain Diff
Added support for inner/outer kernels to overlap communication
parent
dc975ff4
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
jinja_filters.py
+86
-45
86 additions, 45 deletions
jinja_filters.py
sweep.py
+5
-1
5 additions, 1 deletion
sweep.py
templates/Sweep.tmpl.cpp
+65
-1
65 additions, 1 deletion
templates/Sweep.tmpl.cpp
templates/Sweep.tmpl.h
+8
-2
8 additions, 2 deletions
templates/Sweep.tmpl.h
with
164 additions
and
49 deletions
jinja_filters.py
+
86
−
45
View file @
0f21a28f
...
@@ -4,21 +4,23 @@ import copy
...
@@ -4,21 +4,23 @@ import copy
from
pystencils.astnodes
import
ResolvedFieldAccess
from
pystencils.astnodes
import
ResolvedFieldAccess
from
pystencils.data_types
import
get_base_type
from
pystencils.data_types
import
get_base_type
from
pystencils.backends.cbackend
import
generate_c
,
CustomSympyPrinter
from
pystencils.backends.cbackend
import
generate_c
,
CustomSympyPrinter
from
pystencils.field
import
FieldType
from
pystencils.field
import
FieldType
,
Field
from
pystencils.sympyextensions
import
prod
from
pystencils.sympyextensions
import
prod
temporary_fieldMemberTemplate
=
"""
std::set< {type} *, field::SwapableCompare< {type} * > > cache_{original_field_name}_;
"""
temporary_fieldTemplate
=
"""
temporary_fieldTemplate
=
"""
// Getting temporary field {tmp_field_name}
// Getting temporary field {tmp_field_name}
static std::set< {type} *, field::SwapableCompare< {type} * > > cache_{original_field_name};
auto it = cache_{original_field_name}_.find( {original_field_name} );
auto it = cache_{original_field_name}.find( {original_field_name} );
if( it != cache_{original_field_name}_.end() )
if( it != cache_{original_field_name}.end() )
{{
{{
{tmp_field_name} = *it;
{tmp_field_name} = *it;
}}
}}
else
else
{{
{{
{tmp_field_name} = {original_field_name}->cloneUninitialized();
{tmp_field_name} = {original_field_name}->cloneUninitialized();
cache_{original_field_name}.insert({tmp_field_name});
cache_{original_field_name}
_
.insert({tmp_field_name});
}}
}}
"""
"""
...
@@ -36,6 +38,7 @@ def get_field_fsize(field, field_accesses=()):
...
@@ -36,6 +38,7 @@ def get_field_fsize(field, field_accesses=()):
elif
field
.
index_dimensions
==
0
:
elif
field
.
index_dimensions
==
0
:
return
1
return
1
else
:
else
:
assert
len
(
field_accesses
)
>
0
assert
field
.
index_dimensions
==
1
assert
field
.
index_dimensions
==
1
max_idx_value
=
0
max_idx_value
=
0
for
acc
in
field_accesses
:
for
acc
in
field_accesses
:
...
@@ -97,7 +100,7 @@ def field_extraction_code(field_accesses, field_name, is_temporary, declaration_
...
@@ -97,7 +100,7 @@ def field_extraction_code(field_accesses, field_name, is_temporary, declaration_
# Determine size of f coordinate which is a template parameter
# Determine size of f coordinate which is a template parameter
f_size
=
get_field_fsize
(
field
,
field_accesses
)
f_size
=
get_field_fsize
(
field
,
field_accesses
)
dtype
=
get_base_type
(
field
.
dtype
)
dtype
=
get_base_type
(
field
.
dtype
)
field_type
=
"
cuda::GPUField<%s>
"
%
(
dtype
,)
if
is_gpu
else
"
GhostLayerField<%s, %d>
"
%
(
dtype
,
f_size
)
field_type
=
make_field_type
(
dtype
,
f_size
,
is_gpu
)
if
not
is_temporary
:
if
not
is_temporary
:
dtype
=
get_base_type
(
field
.
dtype
)
dtype
=
get_base_type
(
field
.
dtype
)
...
@@ -182,6 +185,7 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
...
@@ -182,6 +185,7 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
"""
"""
assert
isinstance
(
ghost_layers_to_include
,
str
)
or
ghost_layers_to_include
>=
0
assert
isinstance
(
ghost_layers_to_include
,
str
)
or
ghost_layers_to_include
>=
0
ast
=
kernel_info
.
ast
ast
=
kernel_info
.
ast
ast_params
=
kernel_info
.
ast
.
parameters
ghost_layers_to_include
=
sp
.
sympify
(
ghost_layers_to_include
)
ghost_layers_to_include
=
sp
.
sympify
(
ghost_layers_to_include
)
if
ast
.
ghost_layers
is
None
:
if
ast
.
ghost_layers
is
None
:
...
@@ -195,10 +199,7 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
...
@@ -195,10 +199,7 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
kernel_call_lines
=
[]
kernel_call_lines
=
[]
fields
=
{
f
.
name
:
f
for
f
in
ast
.
fields_accessed
}
fields
=
{
f
.
name
:
f
for
f
in
ast
.
fields_accessed
}
spatial_shape_symbols
=
[]
def
get_start_coordinates
(
field_object
):
def
get_start_coordinates
(
parameter
):
field_object
=
fields
[
parameter
.
field_name
]
if
cell_interval
is
None
:
if
cell_interval
is
None
:
return
[
-
ghost_layers_to_include
-
required_ghost_layers
]
*
field_object
.
spatial_dimensions
return
[
-
ghost_layers_to_include
-
required_ghost_layers
]
*
field_object
.
spatial_dimensions
else
:
else
:
...
@@ -206,19 +207,43 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
...
@@ -206,19 +207,43 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
return
[
sp
.
Symbol
(
"
{ci}.{coord}Min()
"
.
format
(
coord
=
coord
,
ci
=
cell_interval
))
-
required_ghost_layers
return
[
sp
.
Symbol
(
"
{ci}.{coord}Min()
"
.
format
(
coord
=
coord
,
ci
=
cell_interval
))
-
required_ghost_layers
for
coord
in
(
'
x
'
,
'
y
'
,
'
z
'
)]
for
coord
in
(
'
x
'
,
'
y
'
,
'
z
'
)]
def
get_end_coordinates
(
parameter
):
def
get_end_coordinates
(
field_object
):
field_object
=
fields
[
parameter
.
field_name
]
if
cell_interval
is
None
:
if
cell_interval
is
None
:
shape_names
=
[
'
xSize()
'
,
'
ySize()
'
,
'
zSize()
'
][:
field_object
.
spatial_dimensions
]
shape_names
=
[
'
xSize()
'
,
'
ySize()
'
,
'
zSize()
'
][:
field_object
.
spatial_dimensions
]
offset
=
2
*
ghost_layers_to_include
+
2
*
required_ghost_layers
offset
=
2
*
ghost_layers_to_include
+
2
*
required_ghost_layers
return
[
"
%s->%s + %s
"
%
(
parameter
.
field_
name
,
e
,
offset
)
for
e
in
shape_names
]
return
[
"
%s->%s + %s
"
%
(
field_object
.
name
,
e
,
offset
)
for
e
in
shape_names
]
else
:
else
:
assert
ghost_layers_to_include
==
0
assert
ghost_layers_to_include
==
0
coord_names
=
[
'
x
'
,
'
y
'
,
'
z
'
][:
field_object
.
spatial_dimensions
]
coord_names
=
[
'
x
'
,
'
y
'
,
'
z
'
][:
field_object
.
spatial_dimensions
]
return
[
"
{ci}.{coord}Size() + {gl}
"
.
format
(
coord
=
coord
,
ci
=
cell_interval
,
gl
=
required_ghost_layers
)
return
[
"
{ci}.{coord}Size() + {gl}
"
.
format
(
coord
=
coord
,
ci
=
cell_interval
,
gl
=
2
*
required_ghost_layers
)
for
coord
in
coord_names
]
for
coord
in
coord_names
]
for
param
in
ast
.
parameters
:
def
create_field_shape_code
(
field
,
param_name
,
gpu_copy
=
True
):
result
=
[]
type_str
=
get_base_type
(
Field
.
SHAPE_DTYPE
).
base_name
shapes
=
[
"
%s(%s)
"
%
(
type_str
,
c
)
for
c
in
get_end_coordinates
(
field
)]
max_values
=
[
"
%s->%sSizeWithGhostLayer()
"
%
(
field
.
name
,
coord
)
for
coord
in
[
'
x
'
,
'
y
'
,
'
z
'
]]
for
shape
,
max_value
in
zip
(
shapes
,
max_values
):
result
.
append
(
"
WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);
"
%
(
max_value
,
shape
))
if
field
.
index_dimensions
==
1
:
shapes
.
append
(
"
%s(%s->fSize())
"
%
(
type_str
,
field
.
name
))
elif
field
.
index_dimensions
>
1
:
shapes
.
extend
([
"
%s(%d)
"
%
(
type_str
,
e
)
for
e
in
field
.
index_shape
])
result
.
append
(
"
WALBERLA_ASSERT_EQUAL(int(%s->fSize()), %d);
"
%
(
field
.
name
,
prod
(
field
.
index_shape
)))
if
is_cpu
or
not
gpu_copy
:
result
.
append
(
"
const %s %s [] = {%s};
"
%
(
type_str
,
param_name
,
"
,
"
.
join
(
shapes
)))
else
:
result
.
append
(
"
const %s %s_cpu [] = {%s};
"
%
(
type_str
,
param_name
,
"
,
"
.
join
(
shapes
)))
result
.
append
(
"
WALBERLA_CUDA_CHECK( cudaMemcpyToSymbolAsync(internal_%s::%s, %s_cpu, %d * sizeof(%s),
"
"
0, cudaMemcpyHostToDevice, %s) );
"
%
(
ast
.
function_name
,
param_name
,
param_name
,
len
(
shapes
),
type_str
,
stream
))
return
result
for
param
in
ast_params
:
if
param
.
is_field_argument
and
FieldType
.
is_indexed
(
fields
[
param
.
field_name
]):
if
param
.
is_field_argument
and
FieldType
.
is_indexed
(
fields
[
param
.
field_name
]):
continue
continue
...
@@ -227,7 +252,7 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
...
@@ -227,7 +252,7 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
if
field
.
field_type
==
FieldType
.
BUFFER
:
if
field
.
field_type
==
FieldType
.
BUFFER
:
kernel_call_lines
.
append
(
"
%s %s = %s;
"
%
(
param
.
dtype
,
param
.
name
,
param
.
field_name
))
kernel_call_lines
.
append
(
"
%s %s = %s;
"
%
(
param
.
dtype
,
param
.
name
,
param
.
field_name
))
else
:
else
:
coordinates
=
get_start_coordinates
(
param
)
coordinates
=
get_start_coordinates
(
field
)
actual_gls
=
"
int_c(%s->nrOfGhostLayers())
"
%
(
param
.
field_name
,
)
actual_gls
=
"
int_c(%s->nrOfGhostLayers())
"
%
(
param
.
field_name
,
)
for
c
in
set
(
coordinates
):
for
c
in
set
(
coordinates
):
kernel_call_lines
.
append
(
"
WALBERLA_ASSERT_GREATER_EQUAL(%s, -%s);
"
%
kernel_call_lines
.
append
(
"
WALBERLA_ASSERT_GREATER_EQUAL(%s, -%s);
"
%
...
@@ -255,36 +280,32 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
...
@@ -255,36 +280,32 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
else
:
else
:
kernel_call_lines
.
append
(
"
const %s %s_cpu [] = {%s};
"
%
(
type_str
,
param
.
name
,
"
,
"
.
join
(
strides
)))
kernel_call_lines
.
append
(
"
const %s %s_cpu [] = {%s};
"
%
(
type_str
,
param
.
name
,
"
,
"
.
join
(
strides
)))
kernel_call_lines
.
append
(
kernel_call_lines
.
append
(
"
WALBERLA_CUDA_CHECK( cudaMemcpyToSymbolAsync(internal_%s::%s, %s_cpu, %d * sizeof(%s), 0, cudaMemcpyHostToDevice, %s) );
"
"
WALBERLA_CUDA_CHECK( cudaMemcpyToSymbolAsync(internal_%s::%s, %s_cpu, %d * sizeof(%s),
"
"
0, cudaMemcpyHostToDevice, %s) );
"
%
(
ast
.
function_name
,
param
.
name
,
param
.
name
,
len
(
strides
),
type_str
,
stream
))
%
(
ast
.
function_name
,
param
.
name
,
param
.
name
,
len
(
strides
),
type_str
,
stream
))
elif
param
.
is_field_shape_argument
:
elif
param
.
is_field_shape_argument
:
field
=
fields
[
param
.
field_name
]
kernel_call_lines
+=
create_field_shape_code
(
fields
[
param
.
field_name
],
param
.
name
)
type_str
=
get_base_type
(
param
.
dtype
).
base_name
shapes
=
[
"
%s(%s)
"
%
(
type_str
,
c
)
for
c
in
get_end_coordinates
(
param
)]
spatial_shape_symbols
=
[
sp
.
Symbol
(
"
%s_cpu[%d]
"
%
(
param
.
name
,
i
))
for
i
in
range
(
field
.
spatial_dimensions
)]
max_values
=
[
"
%s->%sSizeWithGhostLayer()
"
%
(
field
.
name
,
coord
)
for
coord
in
[
'
x
'
,
'
y
'
,
'
z
'
]]
for
shape
,
max_value
in
zip
(
shapes
,
max_values
):
kernel_call_lines
.
append
(
"
WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);
"
%
(
max_value
,
shape
))
if
field
.
index_dimensions
==
1
:
shapes
.
append
(
"
%s(%s->fSize())
"
%
(
type_str
,
field
.
name
))
elif
field
.
index_dimensions
>
1
:
shapes
.
extend
([
"
%s(%d)
"
%
(
type_str
,
e
)
for
e
in
field
.
index_shape
])
kernel_call_lines
.
append
(
"
WALBERLA_ASSERT_EQUAL(int(%s->fSize()), %d);
"
%
(
field
.
name
,
prod
(
field
.
index_shape
)))
if
is_cpu
:
kernel_call_lines
.
append
(
"
const %s %s [] = {%s};
"
%
(
type_str
,
param
.
name
,
"
,
"
.
join
(
shapes
)))
else
:
kernel_call_lines
.
append
(
"
const %s %s_cpu [] = {%s};
"
%
(
type_str
,
param
.
name
,
"
,
"
.
join
(
shapes
)))
kernel_call_lines
.
append
(
"
WALBERLA_CUDA_CHECK( cudaMemcpyToSymbolAsync(internal_%s::%s, %s_cpu, %d * sizeof(%s), 0, cudaMemcpyHostToDevice, %s) );
"
%
(
ast
.
function_name
,
param
.
name
,
param
.
name
,
len
(
shapes
),
type_str
,
stream
))
if
not
is_cpu
:
if
not
is_cpu
:
spatial_shape_symbols
=
[]
for
param
in
ast_params
:
if
param
.
is_field_shape_argument
:
spatial_shape_symbols
=
[
sp
.
Symbol
(
"
%s_cpu[%d]
"
%
(
param
.
name
,
i
))
for
i
in
range
(
field
.
spatial_dimensions
)]
if
not
spatial_shape_symbols
:
for
param
in
ast_params
:
if
not
param
.
is_field_argument
:
continue
field
=
fields
[
param
.
field_name
]
if
field
.
field_type
==
FieldType
.
GENERIC
:
kernel_call_lines
+=
create_field_shape_code
(
field
,
'
_size
'
,
gpu_copy
=
False
)
spatial_shape_symbols
=
[
sp
.
Symbol
(
"
_size[%d]
"
%
(
i
,
))
for
i
in
range
(
field
.
spatial_dimensions
)]
break
indexing_dict
=
ast
.
indexing
.
call_parameters
(
spatial_shape_symbols
)
indexing_dict
=
ast
.
indexing
.
call_parameters
(
spatial_shape_symbols
)
call_parameters
=
"
,
"
.
join
([
p
.
name
for
p
in
ast
.
param
eter
s
call_parameters
=
"
,
"
.
join
([
p
.
name
for
p
in
ast
_
params
if
p
.
is_field_ptr_argument
or
not
p
.
is_field_argument
])
if
p
.
is_field_ptr_argument
or
not
p
.
is_field_argument
])
sp_printer_c
=
CustomSympyPrinter
()
sp_printer_c
=
CustomSympyPrinter
()
...
@@ -296,7 +317,7 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
...
@@ -296,7 +317,7 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
]
]
else
:
else
:
kernel_call_lines
.
append
(
"
internal_%s::%s(%s);
"
%
kernel_call_lines
.
append
(
"
internal_%s::%s(%s);
"
%
(
ast
.
function_name
,
ast
.
function_name
,
"
,
"
.
join
([
p
.
name
for
p
in
ast
.
param
eter
s
])))
(
ast
.
function_name
,
ast
.
function_name
,
"
,
"
.
join
([
p
.
name
for
p
in
ast
_
params
])))
return
"
\n
"
.
join
(
kernel_call_lines
)
return
"
\n
"
.
join
(
kernel_call_lines
)
...
@@ -339,16 +360,36 @@ def generate_constructor_parameters(kernel_info, parameters_to_ignore=[]):
...
@@ -339,16 +360,36 @@ def generate_constructor_parameters(kernel_info, parameters_to_ignore=[]):
return
"
,
"
.
join
(
parameter_list
+
varying_parameters
)
return
"
,
"
.
join
(
parameter_list
+
varying_parameters
)
def
generate_members
(
kernel_info
,
parameters_to_ignore
=
[]):
@jinja2.contextfilter
def
generate_members
(
ctx
,
kernel_info
,
parameters_to_ignore
=
None
,
only_fields
=
False
):
if
parameters_to_ignore
is
None
:
parameters_to_ignore
=
[]
ast
=
kernel_info
.
ast
ast
=
kernel_info
.
ast
parameters_to_ignore
+=
kernel_info
.
temporary_fields
fields
=
{
f
.
name
:
f
for
f
in
ast
.
fields_accessed
}
params_to_skip
=
parameters_to_ignore
+
kernel_info
.
temporary_fields
is_gpu
=
ctx
[
'
target
'
]
==
'
gpu
'
result
=
[]
result
=
[]
for
param
in
ast
.
parameters
:
for
param
in
ast
.
parameters
:
if
param
.
is_field_ptr_argument
and
param
.
field_name
not
in
parameters_to_ignore
:
if
only_fields
and
not
param
.
is_field_argument
:
continue
if
param
.
is_field_ptr_argument
and
param
.
field_name
not
in
params_to_skip
:
result
.
append
(
"
BlockDataID %sID;
"
%
(
param
.
field_name
,
))
result
.
append
(
"
BlockDataID %sID;
"
%
(
param
.
field_name
,
))
elif
not
param
.
is_field_argument
and
param
.
name
not
in
param
eter
s_to_
ignore
:
elif
not
param
.
is_field_argument
and
param
.
name
not
in
params_to_
skip
:
result
.
append
(
"
%s %s;
"
%
(
param
.
dtype
,
param
.
name
,))
result
.
append
(
"
%s %s;
"
%
(
param
.
dtype
,
param
.
name
,))
for
field_name
in
kernel_info
.
temporary_fields
:
f
=
fields
[
field_name
]
if
field_name
in
parameters_to_ignore
:
continue
assert
field_name
.
endswith
(
'
_tmp
'
)
original_field_name
=
field_name
[:
-
len
(
'
_tmp
'
)]
f_size
=
get_field_fsize
(
f
)
field_type
=
make_field_type
(
get_base_type
(
f
.
dtype
),
f_size
,
is_gpu
)
result
.
append
(
temporary_fieldMemberTemplate
.
format
(
type
=
field_type
,
original_field_name
=
original_field_name
))
return
"
\n
"
.
join
(
result
)
return
"
\n
"
.
join
(
result
)
...
...
This diff is collapsed.
Click to expand it.
sweep.py
+
5
−
1
View file @
0f21a28f
...
@@ -51,7 +51,8 @@ class Sweep:
...
@@ -51,7 +51,8 @@ class Sweep:
func
=
functools
.
partial
(
kernel_decorator
,
sweep_function
,
sweep
=
sweep
)
func
=
functools
.
partial
(
kernel_decorator
,
sweep_function
,
sweep
=
sweep
)
cb
=
functools
.
partial
(
Sweep
.
_generate_header_and_source
,
func
,
name
,
target
,
cb
=
functools
.
partial
(
Sweep
.
_generate_header_and_source
,
func
,
name
,
target
,
namespace
,
sweep
.
_temporary_fields
,
sweep
.
_field_swaps
,
optimization
=
optimization
)
namespace
,
sweep
.
_temporary_fields
,
sweep
.
_field_swaps
,
optimization
=
optimization
,
staggered
=
False
,
varying_parameters
=
[])
file_names
=
[
name
+
"
.h
"
,
name
+
(
'
.cpp
'
if
target
==
'
cpu
'
else
'
.cu
'
)]
file_names
=
[
name
+
"
.h
"
,
name
+
(
'
.cpp
'
if
target
==
'
cpu
'
else
'
.cu
'
)]
codegen
.
register
(
file_names
,
cb
)
codegen
.
register
(
file_names
,
cb
)
...
@@ -95,11 +96,14 @@ class Sweep:
...
@@ -95,11 +96,14 @@ class Sweep:
env
=
Environment
(
loader
=
PackageLoader
(
'
pystencils_walberla
'
))
env
=
Environment
(
loader
=
PackageLoader
(
'
pystencils_walberla
'
))
add_pystencils_filters_to_jinja_env
(
env
)
add_pystencils_filters_to_jinja_env
(
env
)
representative_field
=
{
p
.
field_name
for
p
in
ast
.
parameters
if
p
.
is_field_argument
}.
pop
()
context
=
{
context
=
{
'
kernel
'
:
KernelInfo
(
ast
,
temporary_fields
,
field_swaps
,
varying_parameters
),
'
kernel
'
:
KernelInfo
(
ast
,
temporary_fields
,
field_swaps
,
varying_parameters
),
'
namespace
'
:
namespace
,
'
namespace
'
:
namespace
,
'
class_name
'
:
ast
.
function_name
[
0
].
upper
()
+
ast
.
function_name
[
1
:],
'
class_name
'
:
ast
.
function_name
[
0
].
upper
()
+
ast
.
function_name
[
1
:],
'
target
'
:
target
,
'
target
'
:
target
,
'
field
'
:
representative_field
,
}
}
header
=
env
.
get_template
(
"
Sweep.tmpl.h
"
).
render
(
**
context
)
header
=
env
.
get_template
(
"
Sweep.tmpl.h
"
).
render
(
**
context
)
...
...
This diff is collapsed.
Click to expand it.
templates/Sweep.tmpl.cpp
+
65
−
1
View file @
0f21a28f
...
@@ -49,10 +49,74 @@ namespace {{namespace}} {
...
@@ -49,10 +49,74 @@ namespace {{namespace}} {
void
{{
class_name
}}
::
operator
()
(
IBlock
*
block
)
void
{{
class_name
}}
::
operator
()
(
IBlock
*
block
)
{
{
{{
kernel
|
generate_block_data_to_field_extraction
|
indent
(
4
)}}
{{
kernel
|
generate_block_data_to_field_extraction
|
indent
(
4
)}}
{{
kernel
|
generate_call
|
indent
(
4
)}}
{{
kernel
|
generate_call
(
stream
=
'
stream_
'
)
|
indent
(
4
)}}
{{
kernel
|
generate_swaps
|
indent
(
4
)}}
{{
kernel
|
generate_swaps
|
indent
(
4
)}}
}
}
void
{{
class_name
}}
::
inner
(
IBlock
*
block
)
{
{{
kernel
|
generate_block_data_to_field_extraction
|
indent
(
4
)}}
CellInterval
inner
=
{{
field
}}
->
xyzSize
();
inner
.
expand
(
-
1
);
{{
kernel
|
generate_call
(
stream
=
'
stream_
'
,
cell_interval
=
'
inner
'
)
|
indent
(
4
)}}
}
void
{{
class_name
}}
::
outer
(
IBlock
*
block
)
{
static
std
::
vector
<
CellInterval
>
layers
;
{
%
if
target
is
equalto
'
gpu
'
%
}
static
std
::
vector
<
cudaStream_t
>
streams
;
{
%
endif
%
}
{{
kernel
|
generate_block_data_to_field_extraction
|
indent
(
4
)}}
if
(
layers
.
size
()
==
0
)
{
CellInterval
ci
;
{{
field
}}
->
getSliceBeforeGhostLayer
(
stencil
::
T
,
ci
,
1
,
false
);
layers
.
push_back
(
ci
);
{{
field
}}
->
getSliceBeforeGhostLayer
(
stencil
::
B
,
ci
,
1
,
false
);
layers
.
push_back
(
ci
);
{{
field
}}
->
getSliceBeforeGhostLayer
(
stencil
::
N
,
ci
,
1
,
false
);
ci
.
expand
(
Cell
(
0
,
0
,
-
1
));
layers
.
push_back
(
ci
);
{{
field
}}
->
getSliceBeforeGhostLayer
(
stencil
::
S
,
ci
,
1
,
false
);
ci
.
expand
(
Cell
(
0
,
0
,
-
1
));
layers
.
push_back
(
ci
);
{{
field
}}
->
getSliceBeforeGhostLayer
(
stencil
::
E
,
ci
,
1
,
false
);
ci
.
expand
(
Cell
(
0
,
-
1
,
-
1
));
layers
.
push_back
(
ci
);
{{
field
}}
->
getSliceBeforeGhostLayer
(
stencil
::
W
,
ci
,
1
,
false
);
ci
.
expand
(
Cell
(
0
,
-
1
,
-
1
));
layers
.
push_back
(
ci
);
{
%
if
target
is
equalto
'
gpu
'
%
}
for
(
int
i
=
0
;
i
<
layers
.
size
();
++
i
)
{
streams
.
push_back
(
cudaStream_t
());
WALBERLA_CUDA_CHECK
(
cudaStreamCreate
(
&
streams
.
back
()
)
);
}
{
%
endif
%
}
}
for
(
int
i
=
0
;
i
<
layers
.
size
();
++
i
)
{
{{
kernel
|
generate_call
(
stream
=
'
streams
[
i
]
'
,
cell_interval
=
"layers[i]"
)
|
indent
(
8
)}}
WALBERLA_CUDA_CHECK
(
cudaStreamSynchronize
(
streams
[
i
]));
// TODO move out when no memcpy is needed to setup call
}
{{
kernel
|
generate_swaps
|
indent
(
4
)}}
}
}
// namespace {{namespace}}
}
// namespace {{namespace}}
}
// namespace walberla
}
// namespace walberla
...
...
This diff is collapsed.
Click to expand it.
templates/Sweep.tmpl.h
+
8
−
2
View file @
0f21a28f
...
@@ -50,14 +50,20 @@ namespace {{namespace}} {
...
@@ -50,14 +50,20 @@ namespace {{namespace}} {
class
{{
class_name
}}
class
{{
class_name
}}
{
{
public:
public:
{{
class_name
}}(
{{
kernel
|
generate_constructor_parameters
}}
)
{{
class_name
}}(
{{
kernel
|
generate_constructor_parameters
}}
{
%
if
target
is
equalto
'
gpu
'
%
}
,
cudaStream_t
stream
=
0
{
%
endif
%
}
)
:
{{
kernel
|
generate_constructor_initializer_list
}}
:
{{
kernel
|
generate_constructor_initializer_list
}}
,
stream_
(
stream
)
{};
{};
void
operator
()
(
IBlock
*
block
);
void
operator
()
(
IBlock
*
block
);
void
inner
(
IBlock
*
block
);
void
outer
(
IBlock
*
block
);
private
:
private
:
{{
kernel
|
generate_members
|
indent
(
4
)}}
{{
kernel
|
generate_members
|
indent
(
4
)}}
{
%
if
target
is
equalto
'
gpu
'
%
}
cudaStream_t
stream_
;
{
%
endif
%
}
};
};
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment