Pystencils waLBerla generation - splitting of sweep in inner/outer part

0df56746 · Martin Bauer · cd1f52e1 · 0df56746 · 0df56746 · 0df56746
Commit 0df56746 authored 6 years ago by Martin Bauer
--- a/sweep.py
+++ b/sweep.py
@@ -61,14 +61,54 @@ class Sweep:
    def generate_from_equations(name, function_returning_assignments, temporary_fields=[], field_swaps=[],
                                namespace="pystencils", target='cpu', optimization={},
                                staggered=False, varying_parameters=[], **kwargs):
-        from pystencils_walberla.cmake_integration import codegen
        cb = functools.partial(Sweep._generate_header_and_source, function_returning_assignments, name, target,
                               namespace, temporary_fields, field_swaps,
                               optimization=optimization, staggered=staggered,
                               varying_parameters=varying_parameters, **kwargs)
        file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')]
+        from pystencils_walberla.cmake_integration import codegen
        codegen.register(file_names, cb)
+    @staticmethod
+    def generate_inner_outer_kernel(name, function_returning_assignments, temporary_fields=[], field_swaps=[],
+                                    namespace="pystencils", target='cpu', optimization={}, outer_optimization={},
+                                    varying_parameters=[], **kwargs):
+        def generate_callback():
+            eqs = function_returning_assignments(**kwargs)
+            ast = create_kernel(eqs, target=target, **optimization)
+            ast.function_name = name
+            outer_kernels = {}
+            for dir_str in ('T', 'B', 'N', 'S', 'E', 'W'):
+                outer_kernel = create_kernel(eqs, target=target, **outer_optimization)
+                outer_kernel.function_name = name + "_" + dir_str
+                outer_kernels[dir_str] = KernelInfo(outer_kernel, temporary_fields, field_swaps, varying_parameters)
+            env = Environment(loader=PackageLoader('pystencils_walberla'))
+            add_pystencils_filters_to_jinja_env(env)
+            representative_field = {p.field_name for p in ast.parameters if p.is_field_argument}.pop()
+            context = {
+                'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters),
+                'namespace': namespace,
+                'class_name': ast.function_name[0].upper() + ast.function_name[1:],
+                'target': target,
+                'field': representative_field,
+                'outer_kernels': outer_kernels,
+            }
+            header = env.get_template("SweepInnerOuter.tmpl.h").render(**context)
+            source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**context)
+            return header, source
+        file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')]
+        from pystencils_walberla.cmake_integration import codegen
+        codegen.register(file_names, generate_callback)
    @staticmethod
    def generate_pack_info(name, function_returning_assignments, target='gpu', **kwargs):
        from pystencils_walberla.cmake_integration import codegen
@@ -81,11 +121,12 @@ class Sweep:
        file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')]
        codegen.register(file_names, callback)
    @staticmethod
-    def _generate_header_and_source(function_returning_equations, name, target, namespace,
+    def _generate_header_and_source(function_returning_assignments, name, target, namespace,
                                    temporary_fields, field_swaps, optimization, staggered,
                                    varying_parameters, **kwargs):
-        eqs = function_returning_equations(**kwargs)
+        eqs = function_returning_assignments(**kwargs)
        if not staggered:
            ast = create_kernel(eqs, target=target, **optimization)
@@ -96,16 +137,14 @@ class Sweep:
        env = Environment(loader=PackageLoader('pystencils_walberla'))
        add_pystencils_filters_to_jinja_env(env)
-        representative_field = {p.field_name for p in ast.parameters if p.is_field_argument}.pop()
        context = {
            'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters),
            'namespace': namespace,
            'class_name': ast.function_name[0].upper() + ast.function_name[1:],
            'target': target,
-            'field': representative_field,
        }
        header = env.get_template("Sweep.tmpl.h").render(**context)
        source = env.get_template("Sweep.tmpl.cpp").render(**context)
        return header, source
--- a/templates/Sweep.tmpl.cpp
+++ b/templates/Sweep.tmpl.cpp
@@ -45,7 +45,6 @@ namespace {{namespace}} {
 {{kernel|generate_definition}}
 void {{class_name}}::operator() ( IBlock * block )
 {
    {{kernel|generate_block_data_to_field_extraction|indent(4)}}
@@ -53,70 +52,6 @@ void {{class_name}}::operator() ( IBlock * block )
    {{kernel|generate_swaps|indent(4)}}
 }
-void {{class_name}}::inner( IBlock * block )
-{
-    {{kernel|generate_block_data_to_field_extraction|indent(4)}}
-    CellInterval inner = {{field}}->xyzSize();
-    inner.expand(-1);
-    {{kernel|generate_call(stream='stream_', cell_interval='inner')|indent(4)}}
-}
-void {{class_name}}::outer( IBlock * block )
-{
-    static std::vector<CellInterval> layers;
-    {%if target is equalto 'gpu'%}
-    static std::vector<cudaStream_t> streams;
-    {% endif %}
-    {{kernel|generate_block_data_to_field_extraction|indent(4)}}
-    if( layers.size() == 0 )
-    {
-        CellInterval ci;
-        {{field}}->getSliceBeforeGhostLayer(stencil::T, ci, 1, false);
-        layers.push_back(ci);
-        {{field}}->getSliceBeforeGhostLayer(stencil::B, ci, 1, false);
-        layers.push_back(ci);
-        {{field}}->getSliceBeforeGhostLayer(stencil::N, ci, 1, false);
-        ci.expand(Cell(0, 0, -1));
-        layers.push_back(ci);
-        {{field}}->getSliceBeforeGhostLayer(stencil::S, ci, 1, false);
-        ci.expand(Cell(0, 0, -1));
-        layers.push_back(ci);
-        {{field}}->getSliceBeforeGhostLayer(stencil::E, ci, 1, false);
-        ci.expand(Cell(0, -1, -1));
-        layers.push_back(ci);
-        {{field}}->getSliceBeforeGhostLayer(stencil::W, ci, 1, false);
-        ci.expand(Cell(0, -1, -1));
-        layers.push_back(ci);
-        {%if target is equalto 'gpu'%}
-        for( int i=0; i < layers.size(); ++i )
-        {
-            streams.push_back(cudaStream_t());
-            WALBERLA_CUDA_CHECK( cudaStreamCreate(&streams.back() ) );
-        }
-        {% endif %}
-    }
-    for( int i=0; i < layers.size(); ++i )
-    {
-        {{kernel|generate_call(stream='streams[i]', cell_interval="layers[i]")|indent(8)}}
-        WALBERLA_CUDA_CHECK(cudaStreamSynchronize(streams[i])); // TODO move out when no memcpy is needed to setup call
-    }
-    {{kernel|generate_swaps|indent(4)}}
-}
 } // namespace {{namespace}}
 } // namespace walberla

--- a/templates/Sweep.tmpl.h
+++ b/templates/Sweep.tmpl.h
@@ -55,10 +55,6 @@ public:
    {};
    void operator() ( IBlock * block );
-    void inner( IBlock * block );
-    void outer( IBlock * block );
 private:
    {{kernel|generate_members|indent(4)}}
    {%if target is equalto 'gpu'%}

--- a/templates/SweepInnerOuter.tmpl.cpp
+++ b/templates/SweepInnerOuter.tmpl.cpp
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{className}}.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+#include <cmath>
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "{{class_name}}.h"
+{% if target is equalto 'cpu' -%}
+#define FUNC_PREFIX
+{%- elif target is equalto 'gpu' -%}
+#define FUNC_PREFIX __global__
+{%- endif %}
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wfloat-equal"
+#   pragma GCC diagnostic ignored "-Wshadow"
+#   pragma GCC diagnostic ignored "-Wconversion"
+#endif
+using namespace std;
+namespace walberla {
+namespace {{namespace}} {
+{{kernel|generate_definition}}
+{% for outer_kernel in outer_kernels.values() %}
+{{outer_kernel|generate_definition}}
+{% endfor %}
+void {{class_name}}::operator() ( IBlock * block )
+{
+    {{kernel|generate_block_data_to_field_extraction|indent(4)}}
+    {{kernel|generate_call(stream='stream_')|indent(4)}}
+    {{kernel|generate_swaps|indent(4)}}
+}
+void {{class_name}}::inner( IBlock * block )
+{
+    {{kernel|generate_block_data_to_field_extraction|indent(4)}}
+    CellInterval inner = {{field}}->xyzSize();
+    inner.expand(-1);
+    {{kernel|generate_call(stream='stream_', cell_interval='inner')|indent(4)}}
+}
+void {{class_name}}::outer( IBlock * block )
+{
+    static std::vector<CellInterval> layers;
+    {%if target is equalto 'gpu'%}
+    static std::vector<cudaStream_t> streams;
+    {% endif %}
+    {{kernel|generate_block_data_to_field_extraction|indent(4)}}
+    if( layers.size() == 0 )
+    {
+        CellInterval ci;
+        {{field}}->getSliceBeforeGhostLayer(stencil::T, ci, 1, false);
+        layers.push_back(ci);
+        {{field}}->getSliceBeforeGhostLayer(stencil::B, ci, 1, false);
+        layers.push_back(ci);
+        {{field}}->getSliceBeforeGhostLayer(stencil::N, ci, 1, false);
+        ci.expand(Cell(0, 0, -1));
+        layers.push_back(ci);
+        {{field}}->getSliceBeforeGhostLayer(stencil::S, ci, 1, false);
+        ci.expand(Cell(0, 0, -1));
+        layers.push_back(ci);
+        {{field}}->getSliceBeforeGhostLayer(stencil::E, ci, 1, false);
+        ci.expand(Cell(0, -1, -1));
+        layers.push_back(ci);
+        {{field}}->getSliceBeforeGhostLayer(stencil::W, ci, 1, false);
+        ci.expand(Cell(0, -1, -1));
+        layers.push_back(ci);
+        {%if target is equalto 'gpu'%}
+        for( int i=0; i < layers.size(); ++i )
+        {
+            streams.push_back(cudaStream_t());
+            WALBERLA_CUDA_CHECK( cudaStreamCreateWithPriority(&streams.back(), cudaStreamDefault, -1) );
+        }
+        {% endif %}
+    }
+    { {{outer_kernels['W']|generate_call(stream='streams[5]', cell_interval="layers[5]")|indent(4)}} }
+    { {{outer_kernels['E']|generate_call(stream='streams[4]', cell_interval="layers[4]")|indent(4)}} }
+    { {{outer_kernels['S']|generate_call(stream='streams[3]', cell_interval="layers[3]")|indent(4)}} }
+    { {{outer_kernels['N']|generate_call(stream='streams[2]', cell_interval="layers[2]")|indent(4)}} }
+    { {{outer_kernels['B']|generate_call(stream='streams[1]', cell_interval="layers[1]")|indent(4)}} }
+    { {{outer_kernels['T']|generate_call(stream='streams[0]', cell_interval="layers[0]")|indent(4)}} }
+    for(int i=0; i < layers.size(); ++i )
+        WALBERLA_CUDA_CHECK( cudaStreamSynchronize(streams[i]) );
+    {{kernel|generate_swaps|indent(4)}}
+}
+} // namespace {{namespace}}
+} // namespace walberla
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
--- a/templates/SweepInnerOuter.tmpl.h
+++ b/templates/SweepInnerOuter.tmpl.h
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author pystencils
+//======================================================================================================================
+#include "core/DataTypes.h"
+{% if target is equalto 'cpu' -%}
+#include "field/GhostLayerField.h"
+{%- elif target is equalto 'gpu' -%}
+#include "cuda/GPUField.h"
+{%- endif %}
+#include "field/SwapableCompare.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include <set>
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+namespace walberla {
+namespace {{namespace}} {
+class {{class_name}}
+{
+public:
+    {{class_name}}( {{kernel|generate_constructor_parameters}}{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %})
+        : {{ kernel|generate_constructor_initializer_list }}, stream_(stream)
+    {};
+    void operator() ( IBlock * block );
+    void inner( IBlock * block );
+    void outer( IBlock * block );
+private:
+    {{kernel|generate_members|indent(4)}}
+    {%if target is equalto 'gpu'%}
+    cudaStream_t stream_;
+    {% endif %}
+};
+} // namespace {{namespace}}
+} // namespace walberla
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif