diff --git a/pystencils_walberla/__init__.py b/pystencils_walberla/__init__.py index 767581a754edcf025f743c15338ef2a4b41c7624..a7e42bfbad77181ed8d76771d60758f4d5284a39 100644 --- a/pystencils_walberla/__init__.py +++ b/pystencils_walberla/__init__.py @@ -1,8 +1,11 @@ +from pystencils_walberla.boundary import ( + generate_boundary, generate_staggered_boundary, generate_staggered_flux_boundary) + from .cmake_integration import CodeGeneration from .codegen import ( - generate_pack_info, generate_pack_info_for_field, generate_pack_info_from_kernel, - generate_mpidtype_info_from_kernel, generate_sweep) + generate_mpidtype_info_from_kernel, generate_pack_info, generate_pack_info_for_field, + generate_pack_info_from_kernel, generate_sweep) __all__ = ['CodeGeneration', 'generate_sweep', 'generate_pack_info_from_kernel', 'generate_pack_info_for_field', 'generate_pack_info', - 'generate_mpidtype_info_from_kernel'] + 'generate_mpidtype_info_from_kernel', 'generate_staggered_boundary', 'generate_staggered_flux_boundary', 'generate_boundary'] diff --git a/pystencils_walberla/boundary.py b/pystencils_walberla/boundary.py new file mode 100644 index 0000000000000000000000000000000000000000..e0d411af55cd89fda60811680cffda0faf93349b --- /dev/null +++ b/pystencils_walberla/boundary.py @@ -0,0 +1,140 @@ +from functools import partial + +import numpy as np +from jinja2 import Environment, PackageLoader, StrictUndefined + +from pystencils import Field, FieldType +from pystencils.boundaries.boundaryhandling import create_boundary_kernel +from pystencils.boundaries.createindexlist import ( + boundary_index_array_coordinate_names, direction_member_name, + numpy_data_type_for_boundary_object) +from pystencils.data_types import TypedSymbol, create_type +from pystencils_walberla.codegen import KernelInfo, default_create_kernel_parameters +from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env + +try: + from pystencils_autodiff import AdjointField + from pystencils_autodiff.lbm.adjoint_boundaryconditions import AdjointBoundaryCondition +except ImportError: + AdjointBoundaryCondition = () + AdjointField = () + + +def generate_boundary(generation_context, + class_name, + boundary_object, + field_name, + neighbor_stencil, + index_shape=(), + field_type=FieldType.GENERIC, + kernel_creation_function=None, + target='cpu', + namespace='pystencils', + **create_kernel_params): + struct_name = "IndexInfo" + boundary_object.name = class_name + dim = len(neighbor_stencil[0]) + + create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) + create_kernel_params["target"] = target + index_struct_dtype = numpy_data_type_for_boundary_object(boundary_object, dim) + + field = Field.create_generic(field_name, dim, + np.float64 if generation_context.double_accuracy else np.float32, + index_dimensions=len(index_shape), layout='fzyx', index_shape=index_shape, + field_type=field_type) + if isinstance(boundary_object, AdjointBoundaryCondition): + field = AdjointField(field) + + index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0], + shape=(TypedSymbol("indexVectorSize", create_type(np.int64)), 1), strides=(1, 1)) + + if not kernel_creation_function: + kernel_creation_function = create_boundary_kernel + kernel = kernel_creation_function(field, index_field, neighbor_stencil, + boundary_object, openmp=generation_context.openmp, target=target) + kernel.function_name = "boundary_" + boundary_object.name + kernel.assumed_inner_stride_one = False + + # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D + if dim == 2: + stencil = () + for d in neighbor_stencil: + d = d + (0,) + stencil = stencil + (d,) + else: + stencil = neighbor_stencil + + stencil_info = [(i, d, ", ".join([str(e) for e in d])) for i, d in enumerate(stencil)] + inv_dirs = [] + for direction in stencil: + inverse_dir = tuple([-i for i in direction]) + inv_dirs.append(stencil.index(inverse_dir)) + + context = { + 'class_name': boundary_object.name, + 'StructName': struct_name, + 'StructDeclaration': struct_from_numpy_dtype(struct_name, index_struct_dtype), + 'kernel': KernelInfo(kernel), + 'stencil_info': stencil_info, + 'inverse_directions': inv_dirs, + 'dim': dim, + 'target': target, + 'namespace': namespace, + 'inner_or_boundary': boundary_object.inner_or_boundary, + 'additional_data': '', # list(map(lambda a: a[0], boundary_object.additional_data)), + 'additional_data_functor': '' # 'boundaryDataInitializer' if boundary_object.additional_data else '' + } + + env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined) + add_pystencils_filters_to_jinja_env(env) + + header = env.get_template('Boundary.tmpl.h').render(**context) + source = env.get_template('Boundary.tmpl.cpp').render(**context) + + source_extension = "cpp" if target == "cpu" else "cu" + generation_context.write_file("{}.h".format(class_name), header) + generation_context.write_file("{}.{}".format(class_name, source_extension), source) + return kernel + + +def _generate_boundary(generation_context, + class_name, + boundary_object, + field_name, + dim, + *args, + **kwargs): + # drop redundant dim parameter + return generate_boundary(generation_context, class_name, boundary_object, field_name, *args, **kwargs) + + +generate_staggered_boundary = partial(_generate_boundary, field_name='field', field_type=FieldType.STAGGERED) +generate_staggered_flux_boundary = partial(_generate_boundary, field_name='flux', field_type=FieldType.STAGGERED_FLUX) + + +def struct_from_numpy_dtype(struct_name, numpy_dtype): + result = "struct %s { \n" % (struct_name,) + + equality_compare = [] + constructor_params = [] + constructor_initializer_list = [] + for name, (sub_type, offset) in numpy_dtype.fields.items(): + pystencils_type = create_type(sub_type) + result += " %s %s;\n" % (pystencils_type, name) + if name in boundary_index_array_coordinate_names or name == direction_member_name: + constructor_params.append("%s %s_" % (pystencils_type, name)) + constructor_initializer_list.append("%s(%s_)" % (name, name)) + else: + constructor_initializer_list.append("%s()" % name) + if pystencils_type.is_float(): + equality_compare.append("floatIsEqual(%s, o.%s)" % (name, name)) + else: + equality_compare.append("%s == o.%s" % (name, name)) + + result += " %s(%s) : %s {}\n" % \ + (struct_name, ", ".join(constructor_params), ", ".join(constructor_initializer_list)) + result += " bool operator==(const %s & o) const {\n return %s;\n }\n" % \ + (struct_name, " && ".join(equality_compare)) + result += "};\n" + return result diff --git a/pystencils_walberla/templates/Boundary.tmpl.cpp b/pystencils_walberla/templates/Boundary.tmpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d4b3333b14bbd2939964a90723ad60b1ed465f6e --- /dev/null +++ b/pystencils_walberla/templates/Boundary.tmpl.cpp @@ -0,0 +1,104 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.cpp +//! \\ingroup lbm +//! \\author lbmpy +//====================================================================================================================== + +#include <cmath> + +#include "core/DataTypes.h" +#include "core/Macros.h" +#include "{{class_name}}.h" +{% if target == 'gpu' -%} +#include "cuda/ErrorChecking.h" +{%- endif %} + + +{% if target == 'cpu' -%} +#define FUNC_PREFIX +{%- elif target == 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + +using namespace std; + +namespace walberla { +namespace {{namespace}} { + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wconversion" +#endif + +#ifdef __CUDACC__ +#pragma push +#pragma diag_suppress = declared_but_not_referenced +#endif + + +{{kernel|generate_definition}} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#ifdef __CUDACC__ +#pragma pop +#endif + + +void {{class_name}}::run( IBlock * block, IndexVectors::Type type {% if target == 'gpu'%}, cudaStream_t stream {%endif%}) +{ + auto * indexVectors = block->getData<IndexVectors>(indexVectorID); + int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() ); + if( indexVectorSize == 0) + return; + + {% if target == 'gpu' -%} + auto pointer = indexVectors->pointerGpu(type); + {% else %} + auto pointer = indexVectors->pointerCpu(type); + {% endif %} + + uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer); + + {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}} + {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}} +} + +void {{class_name}}::operator() ( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +{ + run( block, IndexVectors::ALL{% if target == 'gpu'%}, stream {%endif%}); +} + +void {{class_name}}::inner( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +{ + run( block, IndexVectors::INNER{% if target == 'gpu'%}, stream {%endif%} ); +} + +void {{class_name}}::outer( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +{ + run( block, IndexVectors::OUTER{% if target == 'gpu'%}, stream {%endif%} ); +} + + +} // namespace {{namespace}} +} // namespace walberla + + diff --git a/pystencils_walberla/templates/Boundary.tmpl.h b/pystencils_walberla/templates/Boundary.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..95ae0dd30e8dcbec5f5f9990dea9dd87a344e672 --- /dev/null +++ b/pystencils_walberla/templates/Boundary.tmpl.h @@ -0,0 +1,196 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.h +//! \\author pystencils +//====================================================================================================================== + + +#include "core/DataTypes.h" + +{% if target is equalto 'cpu' -%} +#include "field/GhostLayerField.h" +{%- elif target is equalto 'gpu' -%} +#include "cuda/GPUField.h" +{%- endif %} +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" +#include "blockforest/StructuredBlockForest.h" +#include "field/FlagField.h" + +#include <set> +#include <vector> + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace {{namespace}} { + + +{% if additional_data_functor %} +template < class AdditionalDataFunctor_T > +{% endif %} +class {{class_name}} +{ +public: + {{StructDeclaration|indent(4)}} + + + class IndexVectors + { + public: + using CpuIndexVector = std::vector<{{StructName}}>; + + enum Type { + ALL = 0, + INNER = 1, + OUTER = 2, + NUM_TYPES = 3 + }; + + IndexVectors() : cpuVectors_(NUM_TYPES) {} + bool operator==(IndexVectors & other) { return other.cpuVectors_ == cpuVectors_; } + + {% if target == 'gpu' -%} + ~IndexVectors() { + for( auto & gpuVec: gpuVectors_) + cudaFree( gpuVec ); + } + {% endif %} + + CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; } + {{StructName}} * pointerCpu(Type t) { return &(cpuVectors_[t][0]); } + + {% if target == 'gpu' -%} + {{StructName}} * pointerGpu(Type t) { return gpuVectors_[t]; } + {% endif %} + + void syncGPU() + { + {% if target == 'gpu' -%} + gpuVectors_.resize( cpuVectors_.size() ); + for(size_t i=0; i < size_t(NUM_TYPES); ++i ) + { + auto & gpuVec = gpuVectors_[i]; + auto & cpuVec = cpuVectors_[i]; + cudaFree( gpuVec ); + cudaMalloc( &gpuVec, sizeof({{StructName}}) * cpuVec.size() ); + cudaMemcpy( gpuVec, &cpuVec[0], sizeof({{StructName}}) * cpuVec.size(), cudaMemcpyHostToDevice ); + } + {%- endif %} + } + + private: + std::vector<CpuIndexVector> cpuVectors_; + + {% if target == 'gpu' -%} + using GpuIndexVector = {{StructName}} *; + std::vector<GpuIndexVector> gpuVectors_; + {% endif %} + }; + + + {{class_name}}( const shared_ptr<StructuredBlockForest> & blocks, + {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}} ) + : {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }} + { + auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); }; + indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}"); + }; + + void operator() ( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = nullptr {%endif%}); + void inner( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = nullptr {%endif%}); + void outer( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = nullptr {%endif%}); + + + template<typename FlagField_T> + void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID, + FlagUID boundaryFlagUID, FlagUID domainFlagUID) + { + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + fillFromFlagField<FlagField_T>( &*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID ); + } + + + template<typename FlagField_T> + void fillFromFlagField( IBlock * block, ConstBlockDataID flagFieldID, + FlagUID boundaryFlagUID, FlagUID domainFlagUID ) + { + auto * indexVectors = block->getData< IndexVectors > ( indexVectorID ); + auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL); + auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER); + auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER); + + + auto * flagField = block->getData< FlagField_T > ( flagFieldID ); + + if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) )) + return; + + auto boundaryFlag = flagField->getFlag(boundaryFlagUID); + auto domainFlag = flagField->getFlag(domainFlagUID); + + auto inner = flagField->xyzSize(); + inner.expand( cell_idx_t(-1) ); + + + indexVectorAll.clear(); + indexVectorInner.clear(); + indexVectorOuter.clear(); + + for( auto it = flagField->begin(); it != flagField->end(); ++it ) + { + if( ! isFlagSet(it, domainFlag) ) + continue; + + {%- for dirIdx, dirVec, offset in stencil_info %} + if ( isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), boundaryFlag ) ) + { + {% if inner_or_boundary -%} + auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} ); + {% else -%} + auto element = {{StructName}}(it.x() + cell_idx_c({{dirVec[0]}}), it.y() + cell_idx_c({{dirVec[1]}}), {%if dim == 3%} it.z() + cell_idx_c({{dirVec[2]}}), {%endif %} {{inverse_directions[dirIdx]}} ); + {% endif -%} + indexVectorAll.push_back( element ); + if( inner.contains( it.x(), it.y(), it.z() ) ) + indexVectorInner.push_back( element ); + else + indexVectorOuter.push_back( element ); + } + {% endfor %} + } + + indexVectors->syncGPU(); + } + +private: + void run( IBlock * block, IndexVectors::Type type{% if target == 'gpu'%}, cudaStream_t stream = nullptr {%endif%}); + +public: + BlockDataID indexVectorID; + {% if additional_data_functor -%}AdditionalDataFunctor_T {{ additional_data_functor }};{%- endif %} + {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}} +}; + + + +} // namespace {{namespace}} +} // namespace walberla diff --git a/pystencils_walberla/templates/Sweep.tmpl.h b/pystencils_walberla/templates/Sweep.tmpl.h index ab65351107770245ca02ee418383dc752b8746c4..d6f212b22a9e4e10405d55d02ffbe04f4191368a 100644 --- a/pystencils_walberla/templates/Sweep.tmpl.h +++ b/pystencils_walberla/templates/Sweep.tmpl.h @@ -58,7 +58,7 @@ public: {{ kernel| generate_destructor(class_name) |indent(4) }} - void operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %} ); + void operator() ( IBlock * block{%if target is equalto 'gpu'%}, cudaStream_t stream = nullptr{% endif %} ); void runOnCellInterval(const shared_ptr<StructuredBlockStorage> & blocks, const CellInterval & globalCellInterval, cell_idx_t ghostLayers, IBlock * block {%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %});