Skip to content
Snippets Groups Projects
Commit 2e07b7b9 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Merge branch 'MergeSweepGeneration' into 'master'

Merge sweep generation

Closes #142

See merge request walberla/walberla!425
parents 68edd863 2ebe0932
No related branches found
No related tags found
No related merge requests found
......@@ -73,33 +73,22 @@ def generate_sweep(generation_context, class_name, assignments,
env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
add_pystencils_filters_to_jinja_env(env)
if inner_outer_split is False:
jinja_context = {
'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters),
'namespace': namespace,
'class_name': class_name,
'target': create_kernel_params.get("target", "cpu"),
'headers': get_headers(ast),
'ghost_layers_to_include': ghost_layers_to_include
}
header = env.get_template("Sweep.tmpl.h").render(**jinja_context)
source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context)
else:
main_kernel_info = KernelInfo(ast, temporary_fields, field_swaps, varying_parameters)
representative_field = {p.field_name for p in main_kernel_info.parameters if p.is_field_parameter}
representative_field = sorted(representative_field)[0]
jinja_context = {
'kernel': main_kernel_info,
'namespace': namespace,
'class_name': class_name,
'target': create_kernel_params.get("target", "cpu"),
'field': representative_field,
'headers': get_headers(ast),
'ghost_layers_to_include': 0
}
header = env.get_template("SweepInnerOuter.tmpl.h").render(**jinja_context)
source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**jinja_context)
main_kernel_info = KernelInfo(ast, temporary_fields, field_swaps, varying_parameters)
representative_field = {p.field_name for p in main_kernel_info.parameters if p.is_field_parameter}
representative_field = sorted(representative_field)[0]
jinja_context = {
'kernel': main_kernel_info,
'namespace': namespace,
'class_name': class_name,
'target': create_kernel_params.get("target", "cpu"),
'field': representative_field,
'headers': get_headers(ast),
'ghost_layers_to_include': ghost_layers_to_include,
'inner_outer_split': inner_outer_split
}
header = env.get_template("Sweep.tmpl.h").render(**jinja_context)
source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context)
source_extension = "cpp" if create_kernel_params.get("target", "cpu") == "cpu" else "cu"
generation_context.write_file("{}.h".format(class_name), header)
......
......@@ -83,6 +83,69 @@ void {{class_name}}::runOnCellInterval( const shared_ptr<StructuredBlockStorage>
{{kernel|generate_swaps|indent(4)}}
}
{%if inner_outer_split%}
void {{class_name}}::inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} )
{
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
CellInterval inner = {{field}}->xyzSize();
inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
{{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }}
{{kernel|generate_call(stream='stream', cell_interval='inner')|indent(4)}}
}
void {{class_name}}::outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream {% endif %} )
{
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
if( layers_.size() == 0 )
{
CellInterval ci;
{{field}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
ci.expand(Cell(0, 0, -outerWidth_[2]));
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
ci.expand(Cell(0, 0, -outerWidth_[2]));
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
layers_.push_back(ci);
}
{%if target is equalto 'gpu'%}
{
auto parallelSection_ = parallelStreams_.parallelSection( stream );
for( auto & ci: layers_ )
{
parallelSection_.run([&]( auto s ) {
{{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }}
{{kernel|generate_call(stream='s', cell_interval='ci')|indent(16)}}
});
}
}
{% else %}
for( auto & ci: layers_ )
{
{{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(8) }}
{{kernel|generate_call(cell_interval='ci')|indent(8)}}
}
{% endif %}
{{kernel|generate_swaps|indent(4)}}
}
{% endif %}
} // namespace {{namespace}}
} // namespace walberla
......
......@@ -24,6 +24,9 @@
#include "field/GhostLayerField.h"
{%- elif target is equalto 'gpu' -%}
#include "cuda/GPUField.h"
{% if inner_outer_split -%}
#include "cuda/ParallelStreams.h"
{%- endif %}
{%- endif %}
#include "field/SwapableCompare.h"
#include "domain_decomposition/BlockDataID.h"
......@@ -42,6 +45,7 @@
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wunused-parameter"
# pragma GCC diagnostic ignored "-Wreorder"
#endif
namespace walberla {
......@@ -51,8 +55,8 @@ namespace {{namespace}} {
class {{class_name}}
{
public:
{{class_name}}( {{kernel|generate_constructor_parameters}})
: {{ kernel|generate_constructor_initializer_list }}
{{class_name}}( {{kernel|generate_constructor_parameters}} {%if inner_outer_split%}, const Cell & outerWidth=Cell(1, 1, 1){% endif %})
: {{ kernel|generate_constructor_initializer_list }}{%if inner_outer_split%}{% if kernel|generate_constructor_initializer_list|length %},{% endif %} outerWidth_(outerWidth){% endif %}
{};
{{ kernel| generate_destructor(class_name) |indent(4) }}
......@@ -79,7 +83,29 @@ public:
};
}
{% if inner_outer_split %}
void inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} );
void outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} );
void setOuterPriority(int priority ) {
{%if target is equalto 'gpu'%}
parallelStreams_.setStreamPriority(priority);
{%endif%}
}
{{kernel|generate_members|indent(4)}}
private:
{%if target is equalto 'gpu'%}
cuda::ParallelStreams parallelStreams_;
{% endif %}
Cell outerWidth_;
std::vector<CellInterval> layers_;
{%- else -%}
{{ kernel|generate_members|indent(4) }}
{% endif -%}
};
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file {{class_name}}.cpp
//! \\ingroup lbm
//! \\author lbmpy
//======================================================================================================================
#include <cmath>
#include "core/DataTypes.h"
#include "core/Macros.h"
#include "{{class_name}}.h"
{% for header in headers %}
#include {{header}}
{% endfor %}
{% if target is equalto 'cpu' -%}
#define FUNC_PREFIX
{%- elif target is equalto 'gpu' -%}
#define FUNC_PREFIX __global__
{%- endif %}
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wfloat-equal"
# pragma GCC diagnostic ignored "-Wshadow"
# pragma GCC diagnostic ignored "-Wconversion"
#endif
using namespace std;
namespace walberla {
namespace {{namespace}} {
{{kernel|generate_definition(target)}}
void {{class_name}}::operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} )
{
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
{{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }}
{{kernel|generate_call(stream='stream')|indent(4)}}
{{kernel|generate_swaps|indent(4)}}
}
void {{class_name}}::runOnCellInterval( const shared_ptr<StructuredBlockStorage> & blocks,
const CellInterval & globalCellInterval,
cell_idx_t ghostLayers,
IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} )
{
CellInterval ci = globalCellInterval;
CellInterval blockBB = blocks->getBlockCellBB( *block);
blockBB.expand( ghostLayers );
ci.intersect( blockBB );
blocks->transformGlobalToBlockLocalCellInterval( ci, *block );
if( ci.empty() )
return;
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
{{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }}
{{kernel|generate_call(stream='stream', cell_interval='ci')|indent(4)}}
{{kernel|generate_swaps|indent(4)}}
}
void {{class_name}}::inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} )
{
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
CellInterval inner = {{field}}->xyzSize();
inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
{{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }}
{{kernel|generate_call(stream='stream', cell_interval='inner')|indent(4)}}
}
void {{class_name}}::outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream {% endif %} )
{
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
if( layers_.size() == 0 )
{
CellInterval ci;
{{field}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
ci.expand(Cell(0, 0, -outerWidth_[2]));
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
ci.expand(Cell(0, 0, -outerWidth_[2]));
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
layers_.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
layers_.push_back(ci);
}
{%if target is equalto 'gpu'%}
{
auto parallelSection_ = parallelStreams_.parallelSection( stream );
for( auto & ci: layers_ )
{
parallelSection_.run([&]( auto s ) {
{{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }}
{{kernel|generate_call(stream='s', cell_interval='ci')|indent(16)}}
});
}
}
{% else %}
for( auto & ci: layers_ )
{
{{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(8) }}
{{kernel|generate_call(cell_interval='ci')|indent(8)}}
}
{% endif %}
{{kernel|generate_swaps|indent(4)}}
}
} // namespace {{namespace}}
} // namespace walberla
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic pop
#endif
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file {{class_name}}.h
//! \\author pystencils
//======================================================================================================================
#pragma once
#include "core/DataTypes.h"
{% if target is equalto 'cpu' -%}
#include "field/GhostLayerField.h"
{%- elif target is equalto 'gpu' -%}
#include "cuda/GPUField.h"
#include "cuda/ParallelStreams.h"
{%- endif %}
#include "field/SwapableCompare.h"
#include "domain_decomposition/BlockDataID.h"
#include "domain_decomposition/IBlock.h"
#include "domain_decomposition/StructuredBlockStorage.h"
#include <set>
#ifdef __GNUC__
#define RESTRICT __restrict__
#elif _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT
#endif
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wunused-parameter"
# pragma GCC diagnostic ignored "-Wreorder"
#endif
namespace walberla {
namespace {{namespace}} {
class {{class_name}}
{
public:
{{class_name}}( {{kernel|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1))
: {{ kernel|generate_constructor_initializer_list }}{% if kernel|generate_constructor_initializer_list|length %},{% endif %} outerWidth_(outerWidth)
{};
{{ kernel| generate_destructor(class_name) |indent(4) }}
void operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} );
void runOnCellInterval(const shared_ptr<StructuredBlockStorage> & blocks,
const CellInterval & globalCellInterval, cell_idx_t ghostLayers, IBlock * block
{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %});
static std::function<void (IBlock*)> getSweep(const shared_ptr<{{class_name}}> & kernel) {
return [kernel](IBlock * b) { (*kernel)(b); };
}
static std::function<void (IBlock*{%if target is equalto 'gpu'%} , cudaStream_t {% endif %})>
getSweepOnCellInterval(const shared_ptr<{{class_name}}> & kernel,
const shared_ptr<StructuredBlockStorage> & blocks,
const CellInterval & globalCellInterval,
cell_idx_t ghostLayers=1 )
{
return [kernel, blocks, globalCellInterval, ghostLayers] (IBlock * b{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %}) {
kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b{%if target is equalto 'gpu'%}, stream {% endif %});
};
}
void inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} );
void outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} );
void setOuterPriority(int priority ) {
{%if target is equalto 'gpu'%}
parallelStreams_.setStreamPriority(priority);
{%endif%}
}
{{kernel|generate_members|indent(4)}}
private:
{%if target is equalto 'gpu'%}
cuda::ParallelStreams parallelStreams_;
{% endif %}
Cell outerWidth_;
std::vector<CellInterval> layers_;
};
} // namespace {{namespace}}
} // namespace walberla
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic pop
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment