From b83706cd983b17ba27a569d45992daa3737fc573 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Thu, 18 Apr 2019 11:14:11 +0200 Subject: [PATCH] Bugfixes in pystencils_walberla and lbmpy_walberla - missing pragma once added - static variable to member in overlap sweep, when called with changing block sizes this lead to wrong results --- .../templates/GpuPackInfo.tmpl.h | 1 + .../templates/SweepInnerOuter.tmpl.cpp | 20 +++++++++---------- .../templates/SweepInnerOuter.tmpl.h | 2 ++ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pystencils_walberla/templates/GpuPackInfo.tmpl.h b/pystencils_walberla/templates/GpuPackInfo.tmpl.h index 74bd12b..19d68f5 100644 --- a/pystencils_walberla/templates/GpuPackInfo.tmpl.h +++ b/pystencils_walberla/templates/GpuPackInfo.tmpl.h @@ -1,3 +1,4 @@ +#pragma once #include "stencil/Directions.h" #include "core/cell/CellInterval.h" #include "cuda/GPUField.h" diff --git a/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp b/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp index 3cf5069..de1c5b5 100644 --- a/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp +++ b/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp @@ -85,38 +85,36 @@ void {{class_name}}::inner( IBlock * block{%if target is equalto 'gpu'%} , cudaS void {{class_name}}::outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream {% endif %} ) { - static std::vector<CellInterval> layers; - {{kernel|generate_block_data_to_field_extraction|indent(4)}} - if( layers.size() == 0 ) + if( layers_.size() == 0 ) { CellInterval ci; {{field}}->getSliceBeforeGhostLayer(stencil::T, ci, 1, false); - layers.push_back(ci); + layers_.push_back(ci); {{field}}->getSliceBeforeGhostLayer(stencil::B, ci, 1, false); - layers.push_back(ci); + layers_.push_back(ci); {{field}}->getSliceBeforeGhostLayer(stencil::N, ci, 1, false); ci.expand(Cell(0, 0, -1)); - layers.push_back(ci); + layers_.push_back(ci); {{field}}->getSliceBeforeGhostLayer(stencil::S, ci, 1, false); ci.expand(Cell(0, 0, -1)); - layers.push_back(ci); + layers_.push_back(ci); {{field}}->getSliceBeforeGhostLayer(stencil::E, ci, 1, false); ci.expand(Cell(0, -1, -1)); - layers.push_back(ci); + layers_.push_back(ci); {{field}}->getSliceBeforeGhostLayer(stencil::W, ci, 1, false); ci.expand(Cell(0, -1, -1)); - layers.push_back(ci); + layers_.push_back(ci); } {%if target is equalto 'gpu'%} { auto parallelSection_ = parallelStreams_.parallelSection( stream ); - for( auto & ci: layers ) + for( auto & ci: layers_ ) { parallelSection_.run([&]( auto s ) { {{kernel|generate_call(stream='s', cell_interval='ci')|indent(16)}} @@ -124,7 +122,7 @@ void {{class_name}}::outer( IBlock * block{%if target is equalto 'gpu'%} , cudaS } } {% else %} - for( auto & ci: layers ) + for( auto & ci: layers_ ) { {{kernel|generate_call(cell_interval='ci')|indent(8)}} } diff --git a/pystencils_walberla/templates/SweepInnerOuter.tmpl.h b/pystencils_walberla/templates/SweepInnerOuter.tmpl.h index 733083a..c2c1d4d 100644 --- a/pystencils_walberla/templates/SweepInnerOuter.tmpl.h +++ b/pystencils_walberla/templates/SweepInnerOuter.tmpl.h @@ -97,6 +97,8 @@ private: {%if target is equalto 'gpu'%} cuda::ParallelStreams parallelStreams_; {% endif %} + + std::vector<CellInterval> layers_; }; -- GitLab