Skip to content
Snippets Groups Projects
Commit 1b2194cd authored by Christoph Rettinger's avatar Christoph Rettinger
Browse files

Fixed vectorization depening on field layout, added test case to verify it

parent 63357a4c
No related branches found
No related tags found
No related merge requests found
......@@ -122,7 +122,7 @@ def __lattice_model(generation_context, class_name, lb_method, stream_collide_as
generation_context.write_file("{}.cpp".format(class_name), source)
def generate_lattice_model(generation_context, class_name, collision_rule, field_layout='fzyx', refinement_scaling=None,
def generate_lattice_model(generation_context, class_name, collision_rule, field_layout='zyxf', refinement_scaling=None,
**create_kernel_params):
# usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial
......@@ -134,13 +134,15 @@ def generate_lattice_model(generation_context, class_name, collision_rule, field
q = len(lb_method.stencil)
dim = lb_method.dim
if field_layout == 'fzyx':
create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] = True
elif field_layout == 'zyxf':
create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] = False
create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params)
if create_kernel_params['target'] == 'gpu':
raise ValueError("Lattice Models can only be generated for CPUs. To generate LBM on GPUs use sweeps directly")
if field_layout != 'fzyx' and create_kernel_params['cpu_vectorize_info']['instruction_set'] != None:
raise ValueError("Vectorization is not available for given field layout, use fzyx instead")
src_field = ps.Field.create_generic('pdfs', dim, dtype, index_dimensions=1, layout=field_layout, index_shape=(q,))
dst_field = ps.Field.create_generic('pdfs_tmp', dim, dtype, index_dimensions=1, layout=field_layout, index_shape=(q,))
......
......@@ -2,8 +2,9 @@ from .boundary import generate_staggered_boundary, generate_staggered_flux_bound
from .cmake_integration import CodeGeneration
from .codegen import (
generate_pack_info, generate_pack_info_for_field, generate_pack_info_from_kernel,
generate_mpidtype_info_from_kernel, generate_sweep)
generate_mpidtype_info_from_kernel, generate_sweep, get_vectorize_instruction_set)
__all__ = ['CodeGeneration',
'generate_sweep', 'generate_pack_info_from_kernel', 'generate_pack_info_for_field', 'generate_pack_info',
'generate_mpidtype_info_from_kernel', 'generate_staggered_boundary', 'generate_staggered_flux_boundary']
'generate_mpidtype_info_from_kernel', 'generate_staggered_boundary', 'generate_staggered_flux_boundary',
'get_vectorize_instruction_set']
......@@ -13,7 +13,8 @@ from pystencils.stencil import inverse_direction, offset_to_direction_string
from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
__all__ = ['generate_sweep', 'generate_pack_info', 'generate_pack_info_for_field', 'generate_pack_info_from_kernel',
'generate_mpidtype_info_from_kernel', 'default_create_kernel_parameters', 'KernelInfo']
'generate_mpidtype_info_from_kernel', 'default_create_kernel_parameters', 'KernelInfo',
'get_vectorize_instruction_set']
def generate_sweep(generation_context, class_name, assignments,
......@@ -322,26 +323,28 @@ class KernelInfo:
self.parameters = ast.get_parameters() # cache parameters here
def default_create_kernel_parameters(generation_context, params):
default_dtype = "float64" if generation_context.double_accuracy else 'float32'
def get_vectorize_instruction_set(generation_context):
if generation_context.optimize_for_localhost:
supported_instruction_sets = get_supported_instruction_sets()
if supported_instruction_sets:
default_vec_is = get_supported_instruction_sets()[-1]
return get_supported_instruction_sets()[-1]
else: # if cpuinfo package is not installed
default_vec_is = 'sse'
return 'sse'
else:
default_vec_is = None
return None
def default_create_kernel_parameters(generation_context, params):
default_dtype = "float64" if generation_context.double_accuracy else 'float32'
params['target'] = params.get('target', 'cpu')
params['data_type'] = params.get('data_type', default_dtype)
params['cpu_openmp'] = params.get('cpu_openmp', generation_context.openmp)
params['cpu_vectorize_info'] = params.get('cpu_vectorize_info', {})
default_vec_is = get_vectorize_instruction_set(generation_context)
vec = params['cpu_vectorize_info']
vec['instruction_set'] = vec.get('instruction_set', default_vec_is)
vec['assume_inner_stride_one'] = True
vec['assume_inner_stride_one'] = vec.get('assume_inner_stride_one', True)
vec['assume_aligned'] = vec.get('assume_aligned', False)
vec['nontemporal'] = vec.get('nontemporal', False)
return params
......
......@@ -72,13 +72,23 @@ waLBerla_execute_test( NAME SuViscoelasticityTest COMMAND $<TARGET_FILE:SuVisco
# Code Generation
if( WALBERLA_BUILD_WITH_CODEGEN )
waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGenerated
FILE codegen/LbCodeGenerationExample.py
OUT_FILES LbCodeGenerationExample_LatticeModel.cpp LbCodeGenerationExample_LatticeModel.h
LbCodeGenerationExample_NoSlip.cpp LbCodeGenerationExample_NoSlip.h
LbCodeGenerationExample_UBB.cpp LbCodeGenerationExample_UBB.h )
waLBerla_compile_test( FILES codegen/LbCodeGenerationExample.cpp DEPENDS LbCodeGenerationExampleGenerated)
waLBerla_generate_target_from_python(NAME FluctuatingMRTGenerated FILE codegen/FluctuatingMRT.py
OUT_FILES FluctuatingMRT_LatticeModel.cpp FluctuatingMRT_LatticeModel.h )
waLBerla_compile_test( FILES codegen/FluctuatingMRT.cpp DEPENDS FluctuatingMRTGenerated)
waLBerla_generate_target_from_python(NAME FieldLayoutAndVectorizationTestGenerated FILE codegen/FieldLayoutAndVectorizationTest.py
OUT_FILES FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.cpp FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.h
FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.cpp FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.h
FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.cpp FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.h
FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.cpp FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.h )
waLBerla_compile_test( FILES codegen/FieldLayoutAndVectorizationTest.cpp DEPENDS FieldLayoutAndVectorizationTestGenerated)
endif()
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file FieldLayoutAndVectorizationTest.cpp
//! \author Christoph Rettinger <christoph.rettinger@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "core/DataTypes.h"
#include "core/Environment.h"
#include "core/debug/Debug.h"
#include "core/debug/TestSubsystem.h"
#include "core/math/all.h"
#include "core/logging/all.h"
#include "lbm/field/AddToStorage.h"
#include "lbm/field/PdfField.h"
#include "FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.h"
#include "FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.h"
#include "FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.h"
#include "FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.h"
namespace field_layout_vectorization_test {
using namespace walberla;
typedef walberla::uint8_t flag_t;
typedef FlagField<flag_t> FlagField_T;
template<typename LM1_T, typename LM2_T>
void checkEquivalence(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID pdfField1ID, BlockDataID pdfField2ID, std::string identifier)
{
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto pdfField1 = blockIt->getData<lbm::PdfField<LM1_T> >(pdfField1ID);
auto pdfField2 = blockIt->getData<lbm::PdfField<LM2_T> >(pdfField2ID);
WALBERLA_FOR_ALL_CELLS_XYZ(pdfField1,
for( uint_t f = 0; f < pdfField1->fSize(); ++f)
{
WALBERLA_CHECK_FLOAT_EQUAL(pdfField1->get(x,y,z,f), pdfField2->get(x,y,z,f),
identifier << " - mismatch in " << x << " " << y << " " << z << " " << f);
}
)
}
}
int main(int argc, char **argv) {
debug::enterTestMode();
mpi::Environment env( argc, argv );
walberla::Environment walberlaEnv(argc, argv);
auto blocks = blockforest::createUniformBlockGrid( 1, 1, 1,
32, 32, 32, real_t(1),
0, false, false,
true, true, true, //periodicity
false );
real_t omega = real_t(1.6);
Vector3<real_t> initialVel(real_t(0.1), real_t(0), real_t(0));
using LatticeModel1_T = lbm::FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel;
LatticeModel1_T latticeModel1(omega);
BlockDataID pdfField1ID = lbm::addPdfFieldToStorage(blocks, "pdf field1", latticeModel1, initialVel, real_t(1), field::fzyx);
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto sweep = LatticeModel1_T::Sweep(pdfField1ID);
sweep(&(*blockIt));
}
using LatticeModel2_T = lbm::FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel;
LatticeModel2_T latticeModel2(omega);
BlockDataID pdfField2ID = lbm::addPdfFieldToStorage(blocks, "pdf field2", latticeModel2, initialVel, real_t(1), field::fzyx);
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto sweep = LatticeModel2_T::Sweep(pdfField2ID);
sweep(&(*blockIt));
}
checkEquivalence<LatticeModel1_T,LatticeModel2_T>(blocks, pdfField1ID, pdfField2ID, "1 vs 2");
using LatticeModel3_T = lbm::FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel;
LatticeModel3_T latticeModel3(omega);
BlockDataID pdfField3ID = lbm::addPdfFieldToStorage(blocks, "pdf field3", latticeModel3, initialVel, real_t(1), field::zyxf);
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto sweep = LatticeModel3_T::Sweep(pdfField3ID);
sweep(&(*blockIt));
}
checkEquivalence<LatticeModel2_T,LatticeModel3_T>(blocks, pdfField2ID, pdfField3ID, "2 vs 3");
using LatticeModel4_T = lbm::FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel;
LatticeModel4_T latticeModel4(omega);
BlockDataID pdfField4ID = lbm::addPdfFieldToStorage(blocks, "pdf field4", latticeModel4, initialVel, real_t(1), field::zyxf);
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto sweep = LatticeModel4_T::Sweep(pdfField4ID);
sweep(&(*blockIt));
}
checkEquivalence<LatticeModel3_T,LatticeModel4_T>(blocks, pdfField3ID, pdfField4ID, "3 vs 4");
return EXIT_SUCCESS;
}
} //namespace field_layout_vectorization_test
int main( int argc, char **argv ){
field_layout_vectorization_test::main(argc, argv);
}
import sympy as sp
from lbmpy.creationfunctions import create_lb_collision_rule
from pystencils_walberla import CodeGeneration
from lbmpy_walberla import generate_lattice_model
from pystencils_walberla import get_vectorize_instruction_set
from collections import namedtuple
with CodeGeneration() as ctx:
omega_shear = sp.symbols("omega")
collision_rule = create_lb_collision_rule( stencil='D2Q9', compressible=False, method='srt' )
SetupDefinition = namedtuple('SetupDefinition', ['name', 'field_layout', 'vectorization_dict'])
default_vectorize_instruction_set = get_vectorize_instruction_set(ctx)
configurations = [SetupDefinition('FZYX_Vec', 'fzyx', {'instruction_set': default_vectorize_instruction_set} ),
SetupDefinition('FZYX_NoVec', 'fzyx', {'instruction_set': None} ),
SetupDefinition('ZYXF_Vec', 'zyxf', {'instruction_set': default_vectorize_instruction_set} ), # does/should not vectorize, but instead yield warning
SetupDefinition('ZYXF_NoVec', 'zyxf', {'instruction_set': None} )]
for conf in configurations:
generate_lattice_model(ctx, 'FieldLayoutAndVectorizationTest_'+conf.name+'_LatticeModel', collision_rule,
field_layout=conf.field_layout, refinement_scaling=None, cpu_vectorize_info=conf.vectorization_dict)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment