diff --git a/sweep.py b/sweep.py index 87354a2184b74472b9a62ebb9b61de41fc294cc8..5b0e2aa252e579f316fd862cfdd27818b49e5bb1 100644 --- a/sweep.py +++ b/sweep.py @@ -61,14 +61,54 @@ class Sweep: def generate_from_equations(name, function_returning_assignments, temporary_fields=[], field_swaps=[], namespace="pystencils", target='cpu', optimization={}, staggered=False, varying_parameters=[], **kwargs): - from pystencils_walberla.cmake_integration import codegen + cb = functools.partial(Sweep._generate_header_and_source, function_returning_assignments, name, target, namespace, temporary_fields, field_swaps, optimization=optimization, staggered=staggered, varying_parameters=varying_parameters, **kwargs) + file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] + from pystencils_walberla.cmake_integration import codegen codegen.register(file_names, cb) + @staticmethod + def generate_inner_outer_kernel(name, function_returning_assignments, temporary_fields=[], field_swaps=[], + namespace="pystencils", target='cpu', optimization={}, outer_optimization={}, + varying_parameters=[], **kwargs): + def generate_callback(): + eqs = function_returning_assignments(**kwargs) + + ast = create_kernel(eqs, target=target, **optimization) + ast.function_name = name + + outer_kernels = {} + for dir_str in ('T', 'B', 'N', 'S', 'E', 'W'): + outer_kernel = create_kernel(eqs, target=target, **outer_optimization) + outer_kernel.function_name = name + "_" + dir_str + outer_kernels[dir_str] = KernelInfo(outer_kernel, temporary_fields, field_swaps, varying_parameters) + + env = Environment(loader=PackageLoader('pystencils_walberla')) + add_pystencils_filters_to_jinja_env(env) + + representative_field = {p.field_name for p in ast.parameters if p.is_field_argument}.pop() + + context = { + 'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters), + 'namespace': namespace, + 'class_name': ast.function_name[0].upper() + ast.function_name[1:], + 'target': target, + 'field': representative_field, + 'outer_kernels': outer_kernels, + } + + header = env.get_template("SweepInnerOuter.tmpl.h").render(**context) + source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**context) + return header, source + + file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] + from pystencils_walberla.cmake_integration import codegen + codegen.register(file_names, generate_callback) + @staticmethod def generate_pack_info(name, function_returning_assignments, target='gpu', **kwargs): from pystencils_walberla.cmake_integration import codegen @@ -81,11 +121,12 @@ class Sweep: file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] codegen.register(file_names, callback) + @staticmethod - def _generate_header_and_source(function_returning_equations, name, target, namespace, + def _generate_header_and_source(function_returning_assignments, name, target, namespace, temporary_fields, field_swaps, optimization, staggered, varying_parameters, **kwargs): - eqs = function_returning_equations(**kwargs) + eqs = function_returning_assignments(**kwargs) if not staggered: ast = create_kernel(eqs, target=target, **optimization) @@ -96,16 +137,14 @@ class Sweep: env = Environment(loader=PackageLoader('pystencils_walberla')) add_pystencils_filters_to_jinja_env(env) - representative_field = {p.field_name for p in ast.parameters if p.is_field_argument}.pop() - context = { 'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters), 'namespace': namespace, 'class_name': ast.function_name[0].upper() + ast.function_name[1:], 'target': target, - 'field': representative_field, } header = env.get_template("Sweep.tmpl.h").render(**context) source = env.get_template("Sweep.tmpl.cpp").render(**context) return header, source + diff --git a/templates/Sweep.tmpl.cpp b/templates/Sweep.tmpl.cpp index ddb43db7a81d32f51c736d18a96dcf8b989be23d..4dc0b53ace664bf94e380e0578ea38046a8a0cd9 100644 --- a/templates/Sweep.tmpl.cpp +++ b/templates/Sweep.tmpl.cpp @@ -45,7 +45,6 @@ namespace {{namespace}} { {{kernel|generate_definition}} - void {{class_name}}::operator() ( IBlock * block ) { {{kernel|generate_block_data_to_field_extraction|indent(4)}} @@ -53,70 +52,6 @@ void {{class_name}}::operator() ( IBlock * block ) {{kernel|generate_swaps|indent(4)}} } - - -void {{class_name}}::inner( IBlock * block ) -{ - {{kernel|generate_block_data_to_field_extraction|indent(4)}} - - CellInterval inner = {{field}}->xyzSize(); - inner.expand(-1); - - {{kernel|generate_call(stream='stream_', cell_interval='inner')|indent(4)}} -} - - -void {{class_name}}::outer( IBlock * block ) -{ - static std::vector<CellInterval> layers; - {%if target is equalto 'gpu'%} - static std::vector<cudaStream_t> streams; - {% endif %} - - {{kernel|generate_block_data_to_field_extraction|indent(4)}} - - if( layers.size() == 0 ) - { - CellInterval ci; - - {{field}}->getSliceBeforeGhostLayer(stencil::T, ci, 1, false); - layers.push_back(ci); - {{field}}->getSliceBeforeGhostLayer(stencil::B, ci, 1, false); - layers.push_back(ci); - - {{field}}->getSliceBeforeGhostLayer(stencil::N, ci, 1, false); - ci.expand(Cell(0, 0, -1)); - layers.push_back(ci); - {{field}}->getSliceBeforeGhostLayer(stencil::S, ci, 1, false); - ci.expand(Cell(0, 0, -1)); - layers.push_back(ci); - - {{field}}->getSliceBeforeGhostLayer(stencil::E, ci, 1, false); - ci.expand(Cell(0, -1, -1)); - layers.push_back(ci); - {{field}}->getSliceBeforeGhostLayer(stencil::W, ci, 1, false); - ci.expand(Cell(0, -1, -1)); - layers.push_back(ci); - - {%if target is equalto 'gpu'%} - for( int i=0; i < layers.size(); ++i ) - { - streams.push_back(cudaStream_t()); - WALBERLA_CUDA_CHECK( cudaStreamCreate(&streams.back() ) ); - } - {% endif %} - } - - for( int i=0; i < layers.size(); ++i ) - { - {{kernel|generate_call(stream='streams[i]', cell_interval="layers[i]")|indent(8)}} - WALBERLA_CUDA_CHECK(cudaStreamSynchronize(streams[i])); // TODO move out when no memcpy is needed to setup call - } - - {{kernel|generate_swaps|indent(4)}} -} - - } // namespace {{namespace}} } // namespace walberla diff --git a/templates/Sweep.tmpl.h b/templates/Sweep.tmpl.h index bb687f9a4cbd20ea8003e73f7ee9be03ce085e52..8f976215a36075fd9e16bb54019f6a0fc642a8cf 100644 --- a/templates/Sweep.tmpl.h +++ b/templates/Sweep.tmpl.h @@ -55,10 +55,6 @@ public: {}; void operator() ( IBlock * block ); - - void inner( IBlock * block ); - void outer( IBlock * block ); - private: {{kernel|generate_members|indent(4)}} {%if target is equalto 'gpu'%} diff --git a/templates/SweepInnerOuter.tmpl.cpp b/templates/SweepInnerOuter.tmpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4f12d3629a25e407630bf8a533423af472d90ac1 --- /dev/null +++ b/templates/SweepInnerOuter.tmpl.cpp @@ -0,0 +1,134 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{className}}.cpp +//! \\ingroup lbm +//! \\author lbmpy +//====================================================================================================================== + +#include <cmath> + +#include "core/DataTypes.h" +#include "core/Macros.h" +#include "{{class_name}}.h" + + +{% if target is equalto 'cpu' -%} +#define FUNC_PREFIX +{%- elif target is equalto 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wfloat-equal" +# pragma GCC diagnostic ignored "-Wshadow" +# pragma GCC diagnostic ignored "-Wconversion" +#endif + +using namespace std; + +namespace walberla { +namespace {{namespace}} { + +{{kernel|generate_definition}} + +{% for outer_kernel in outer_kernels.values() %} +{{outer_kernel|generate_definition}} +{% endfor %} + + +void {{class_name}}::operator() ( IBlock * block ) +{ + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + {{kernel|generate_call(stream='stream_')|indent(4)}} + {{kernel|generate_swaps|indent(4)}} +} + + + +void {{class_name}}::inner( IBlock * block ) +{ + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + + CellInterval inner = {{field}}->xyzSize(); + inner.expand(-1); + + {{kernel|generate_call(stream='stream_', cell_interval='inner')|indent(4)}} +} + + +void {{class_name}}::outer( IBlock * block ) +{ + static std::vector<CellInterval> layers; + {%if target is equalto 'gpu'%} + static std::vector<cudaStream_t> streams; + {% endif %} + + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + + if( layers.size() == 0 ) + { + CellInterval ci; + + {{field}}->getSliceBeforeGhostLayer(stencil::T, ci, 1, false); + layers.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::B, ci, 1, false); + layers.push_back(ci); + + {{field}}->getSliceBeforeGhostLayer(stencil::N, ci, 1, false); + ci.expand(Cell(0, 0, -1)); + layers.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::S, ci, 1, false); + ci.expand(Cell(0, 0, -1)); + layers.push_back(ci); + + {{field}}->getSliceBeforeGhostLayer(stencil::E, ci, 1, false); + ci.expand(Cell(0, -1, -1)); + layers.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::W, ci, 1, false); + ci.expand(Cell(0, -1, -1)); + layers.push_back(ci); + + {%if target is equalto 'gpu'%} + for( int i=0; i < layers.size(); ++i ) + { + streams.push_back(cudaStream_t()); + WALBERLA_CUDA_CHECK( cudaStreamCreateWithPriority(&streams.back(), cudaStreamDefault, -1) ); + } + {% endif %} + } + + { {{outer_kernels['W']|generate_call(stream='streams[5]', cell_interval="layers[5]")|indent(4)}} } + { {{outer_kernels['E']|generate_call(stream='streams[4]', cell_interval="layers[4]")|indent(4)}} } + { {{outer_kernels['S']|generate_call(stream='streams[3]', cell_interval="layers[3]")|indent(4)}} } + { {{outer_kernels['N']|generate_call(stream='streams[2]', cell_interval="layers[2]")|indent(4)}} } + { {{outer_kernels['B']|generate_call(stream='streams[1]', cell_interval="layers[1]")|indent(4)}} } + { {{outer_kernels['T']|generate_call(stream='streams[0]', cell_interval="layers[0]")|indent(4)}} } + + for(int i=0; i < layers.size(); ++i ) + WALBERLA_CUDA_CHECK( cudaStreamSynchronize(streams[i]) ); + + {{kernel|generate_swaps|indent(4)}} +} + + +} // namespace {{namespace}} +} // namespace walberla + + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif diff --git a/templates/SweepInnerOuter.tmpl.h b/templates/SweepInnerOuter.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..bb687f9a4cbd20ea8003e73f7ee9be03ce085e52 --- /dev/null +++ b/templates/SweepInnerOuter.tmpl.h @@ -0,0 +1,76 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.h +//! \\author pystencils +//====================================================================================================================== + +#include "core/DataTypes.h" + +{% if target is equalto 'cpu' -%} +#include "field/GhostLayerField.h" +{%- elif target is equalto 'gpu' -%} +#include "cuda/GPUField.h" +{%- endif %} +#include "field/SwapableCompare.h" +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" + +#include <set> + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +namespace walberla { +namespace {{namespace}} { + + +class {{class_name}} +{ +public: + {{class_name}}( {{kernel|generate_constructor_parameters}}{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %}) + : {{ kernel|generate_constructor_initializer_list }}, stream_(stream) + {}; + + void operator() ( IBlock * block ); + + void inner( IBlock * block ); + void outer( IBlock * block ); + +private: + {{kernel|generate_members|indent(4)}} + {%if target is equalto 'gpu'%} + cudaStream_t stream_; + {% endif %} +}; + + +} // namespace {{namespace}} +} // namespace walberla + + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif