Skip to content
Snippets Groups Projects
Commit 0df56746 authored by Martin Bauer's avatar Martin Bauer
Browse files

Pystencils waLBerla generation - splitting of sweep in inner/outer part

parent cd1f52e1
Branches
Tags
No related merge requests found
...@@ -61,14 +61,54 @@ class Sweep: ...@@ -61,14 +61,54 @@ class Sweep:
def generate_from_equations(name, function_returning_assignments, temporary_fields=[], field_swaps=[], def generate_from_equations(name, function_returning_assignments, temporary_fields=[], field_swaps=[],
namespace="pystencils", target='cpu', optimization={}, namespace="pystencils", target='cpu', optimization={},
staggered=False, varying_parameters=[], **kwargs): staggered=False, varying_parameters=[], **kwargs):
from pystencils_walberla.cmake_integration import codegen
cb = functools.partial(Sweep._generate_header_and_source, function_returning_assignments, name, target, cb = functools.partial(Sweep._generate_header_and_source, function_returning_assignments, name, target,
namespace, temporary_fields, field_swaps, namespace, temporary_fields, field_swaps,
optimization=optimization, staggered=staggered, optimization=optimization, staggered=staggered,
varying_parameters=varying_parameters, **kwargs) varying_parameters=varying_parameters, **kwargs)
file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')]
from pystencils_walberla.cmake_integration import codegen
codegen.register(file_names, cb) codegen.register(file_names, cb)
@staticmethod
def generate_inner_outer_kernel(name, function_returning_assignments, temporary_fields=[], field_swaps=[],
namespace="pystencils", target='cpu', optimization={}, outer_optimization={},
varying_parameters=[], **kwargs):
def generate_callback():
eqs = function_returning_assignments(**kwargs)
ast = create_kernel(eqs, target=target, **optimization)
ast.function_name = name
outer_kernels = {}
for dir_str in ('T', 'B', 'N', 'S', 'E', 'W'):
outer_kernel = create_kernel(eqs, target=target, **outer_optimization)
outer_kernel.function_name = name + "_" + dir_str
outer_kernels[dir_str] = KernelInfo(outer_kernel, temporary_fields, field_swaps, varying_parameters)
env = Environment(loader=PackageLoader('pystencils_walberla'))
add_pystencils_filters_to_jinja_env(env)
representative_field = {p.field_name for p in ast.parameters if p.is_field_argument}.pop()
context = {
'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters),
'namespace': namespace,
'class_name': ast.function_name[0].upper() + ast.function_name[1:],
'target': target,
'field': representative_field,
'outer_kernels': outer_kernels,
}
header = env.get_template("SweepInnerOuter.tmpl.h").render(**context)
source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**context)
return header, source
file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')]
from pystencils_walberla.cmake_integration import codegen
codegen.register(file_names, generate_callback)
@staticmethod @staticmethod
def generate_pack_info(name, function_returning_assignments, target='gpu', **kwargs): def generate_pack_info(name, function_returning_assignments, target='gpu', **kwargs):
from pystencils_walberla.cmake_integration import codegen from pystencils_walberla.cmake_integration import codegen
...@@ -81,11 +121,12 @@ class Sweep: ...@@ -81,11 +121,12 @@ class Sweep:
file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')]
codegen.register(file_names, callback) codegen.register(file_names, callback)
@staticmethod @staticmethod
def _generate_header_and_source(function_returning_equations, name, target, namespace, def _generate_header_and_source(function_returning_assignments, name, target, namespace,
temporary_fields, field_swaps, optimization, staggered, temporary_fields, field_swaps, optimization, staggered,
varying_parameters, **kwargs): varying_parameters, **kwargs):
eqs = function_returning_equations(**kwargs) eqs = function_returning_assignments(**kwargs)
if not staggered: if not staggered:
ast = create_kernel(eqs, target=target, **optimization) ast = create_kernel(eqs, target=target, **optimization)
...@@ -96,16 +137,14 @@ class Sweep: ...@@ -96,16 +137,14 @@ class Sweep:
env = Environment(loader=PackageLoader('pystencils_walberla')) env = Environment(loader=PackageLoader('pystencils_walberla'))
add_pystencils_filters_to_jinja_env(env) add_pystencils_filters_to_jinja_env(env)
representative_field = {p.field_name for p in ast.parameters if p.is_field_argument}.pop()
context = { context = {
'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters), 'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters),
'namespace': namespace, 'namespace': namespace,
'class_name': ast.function_name[0].upper() + ast.function_name[1:], 'class_name': ast.function_name[0].upper() + ast.function_name[1:],
'target': target, 'target': target,
'field': representative_field,
} }
header = env.get_template("Sweep.tmpl.h").render(**context) header = env.get_template("Sweep.tmpl.h").render(**context)
source = env.get_template("Sweep.tmpl.cpp").render(**context) source = env.get_template("Sweep.tmpl.cpp").render(**context)
return header, source return header, source
...@@ -45,7 +45,6 @@ namespace {{namespace}} { ...@@ -45,7 +45,6 @@ namespace {{namespace}} {
{{kernel|generate_definition}} {{kernel|generate_definition}}
void {{class_name}}::operator() ( IBlock * block ) void {{class_name}}::operator() ( IBlock * block )
{ {
{{kernel|generate_block_data_to_field_extraction|indent(4)}} {{kernel|generate_block_data_to_field_extraction|indent(4)}}
...@@ -53,70 +52,6 @@ void {{class_name}}::operator() ( IBlock * block ) ...@@ -53,70 +52,6 @@ void {{class_name}}::operator() ( IBlock * block )
{{kernel|generate_swaps|indent(4)}} {{kernel|generate_swaps|indent(4)}}
} }
void {{class_name}}::inner( IBlock * block )
{
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
CellInterval inner = {{field}}->xyzSize();
inner.expand(-1);
{{kernel|generate_call(stream='stream_', cell_interval='inner')|indent(4)}}
}
void {{class_name}}::outer( IBlock * block )
{
static std::vector<CellInterval> layers;
{%if target is equalto 'gpu'%}
static std::vector<cudaStream_t> streams;
{% endif %}
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
if( layers.size() == 0 )
{
CellInterval ci;
{{field}}->getSliceBeforeGhostLayer(stencil::T, ci, 1, false);
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::B, ci, 1, false);
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::N, ci, 1, false);
ci.expand(Cell(0, 0, -1));
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::S, ci, 1, false);
ci.expand(Cell(0, 0, -1));
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::E, ci, 1, false);
ci.expand(Cell(0, -1, -1));
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::W, ci, 1, false);
ci.expand(Cell(0, -1, -1));
layers.push_back(ci);
{%if target is equalto 'gpu'%}
for( int i=0; i < layers.size(); ++i )
{
streams.push_back(cudaStream_t());
WALBERLA_CUDA_CHECK( cudaStreamCreate(&streams.back() ) );
}
{% endif %}
}
for( int i=0; i < layers.size(); ++i )
{
{{kernel|generate_call(stream='streams[i]', cell_interval="layers[i]")|indent(8)}}
WALBERLA_CUDA_CHECK(cudaStreamSynchronize(streams[i])); // TODO move out when no memcpy is needed to setup call
}
{{kernel|generate_swaps|indent(4)}}
}
} // namespace {{namespace}} } // namespace {{namespace}}
} // namespace walberla } // namespace walberla
......
...@@ -55,10 +55,6 @@ public: ...@@ -55,10 +55,6 @@ public:
{}; {};
void operator() ( IBlock * block ); void operator() ( IBlock * block );
void inner( IBlock * block );
void outer( IBlock * block );
private: private:
{{kernel|generate_members|indent(4)}} {{kernel|generate_members|indent(4)}}
{%if target is equalto 'gpu'%} {%if target is equalto 'gpu'%}
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file {{className}}.cpp
//! \\ingroup lbm
//! \\author lbmpy
//======================================================================================================================
#include <cmath>
#include "core/DataTypes.h"
#include "core/Macros.h"
#include "{{class_name}}.h"
{% if target is equalto 'cpu' -%}
#define FUNC_PREFIX
{%- elif target is equalto 'gpu' -%}
#define FUNC_PREFIX __global__
{%- endif %}
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wfloat-equal"
# pragma GCC diagnostic ignored "-Wshadow"
# pragma GCC diagnostic ignored "-Wconversion"
#endif
using namespace std;
namespace walberla {
namespace {{namespace}} {
{{kernel|generate_definition}}
{% for outer_kernel in outer_kernels.values() %}
{{outer_kernel|generate_definition}}
{% endfor %}
void {{class_name}}::operator() ( IBlock * block )
{
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
{{kernel|generate_call(stream='stream_')|indent(4)}}
{{kernel|generate_swaps|indent(4)}}
}
void {{class_name}}::inner( IBlock * block )
{
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
CellInterval inner = {{field}}->xyzSize();
inner.expand(-1);
{{kernel|generate_call(stream='stream_', cell_interval='inner')|indent(4)}}
}
void {{class_name}}::outer( IBlock * block )
{
static std::vector<CellInterval> layers;
{%if target is equalto 'gpu'%}
static std::vector<cudaStream_t> streams;
{% endif %}
{{kernel|generate_block_data_to_field_extraction|indent(4)}}
if( layers.size() == 0 )
{
CellInterval ci;
{{field}}->getSliceBeforeGhostLayer(stencil::T, ci, 1, false);
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::B, ci, 1, false);
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::N, ci, 1, false);
ci.expand(Cell(0, 0, -1));
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::S, ci, 1, false);
ci.expand(Cell(0, 0, -1));
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::E, ci, 1, false);
ci.expand(Cell(0, -1, -1));
layers.push_back(ci);
{{field}}->getSliceBeforeGhostLayer(stencil::W, ci, 1, false);
ci.expand(Cell(0, -1, -1));
layers.push_back(ci);
{%if target is equalto 'gpu'%}
for( int i=0; i < layers.size(); ++i )
{
streams.push_back(cudaStream_t());
WALBERLA_CUDA_CHECK( cudaStreamCreateWithPriority(&streams.back(), cudaStreamDefault, -1) );
}
{% endif %}
}
{ {{outer_kernels['W']|generate_call(stream='streams[5]', cell_interval="layers[5]")|indent(4)}} }
{ {{outer_kernels['E']|generate_call(stream='streams[4]', cell_interval="layers[4]")|indent(4)}} }
{ {{outer_kernels['S']|generate_call(stream='streams[3]', cell_interval="layers[3]")|indent(4)}} }
{ {{outer_kernels['N']|generate_call(stream='streams[2]', cell_interval="layers[2]")|indent(4)}} }
{ {{outer_kernels['B']|generate_call(stream='streams[1]', cell_interval="layers[1]")|indent(4)}} }
{ {{outer_kernels['T']|generate_call(stream='streams[0]', cell_interval="layers[0]")|indent(4)}} }
for(int i=0; i < layers.size(); ++i )
WALBERLA_CUDA_CHECK( cudaStreamSynchronize(streams[i]) );
{{kernel|generate_swaps|indent(4)}}
}
} // namespace {{namespace}}
} // namespace walberla
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic pop
#endif
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file {{class_name}}.h
//! \\author pystencils
//======================================================================================================================
#include "core/DataTypes.h"
{% if target is equalto 'cpu' -%}
#include "field/GhostLayerField.h"
{%- elif target is equalto 'gpu' -%}
#include "cuda/GPUField.h"
{%- endif %}
#include "field/SwapableCompare.h"
#include "domain_decomposition/BlockDataID.h"
#include "domain_decomposition/IBlock.h"
#include <set>
#ifdef __GNUC__
#define RESTRICT __restrict__
#elif _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT
#endif
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
namespace walberla {
namespace {{namespace}} {
class {{class_name}}
{
public:
{{class_name}}( {{kernel|generate_constructor_parameters}}{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %})
: {{ kernel|generate_constructor_initializer_list }}, stream_(stream)
{};
void operator() ( IBlock * block );
void inner( IBlock * block );
void outer( IBlock * block );
private:
{{kernel|generate_members|indent(4)}}
{%if target is equalto 'gpu'%}
cudaStream_t stream_;
{% endif %}
};
} // namespace {{namespace}}
} // namespace walberla
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic pop
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment