diff --git a/apps/benchmarks/TurbulentChannel/TurbulentChannel.cpp b/apps/benchmarks/TurbulentChannel/TurbulentChannel.cpp index a552ea5864bb3c5ee697bb3c15487e198beca9b7..ca78e9f04dad09bce9b12098d2278a1484f146ce 100644 --- a/apps/benchmarks/TurbulentChannel/TurbulentChannel.cpp +++ b/apps/benchmarks/TurbulentChannel/TurbulentChannel.cpp @@ -424,11 +424,11 @@ namespace walberla { ForceCalculator<VectorField_T> const * const forceCalculator, const std::weak_ptr<StructuredBlockStorage> & blocks, const BlockDataID velocityFieldId, const BlockDataID meanVelocityFieldId, - const BlockDataID meanTkeSGSFieldId, Welford_T * velocityWelford, + const BlockDataID meanTkeSGSFieldId, const BlockDataID sosFieldId, Welford_T * velocityWelford, const bool separateFile = false) : parameters_(parameters), forceCalculator_(forceCalculator), timeloop_(timeloop), blocks_(blocks), velocityWelford_(velocityWelford), velocityFieldId_(velocityFieldId), meanVelocityFieldId_(meanVelocityFieldId), - meanTkeSGSFieldId_(meanTkeSGSFieldId), plotFrequency_(parameters->plotFrequency), plotStart_(parameters->plotStart), + meanTkeSGSFieldId_(meanTkeSGSFieldId), sosFieldId_(sosFieldId), plotFrequency_(parameters->plotFrequency), plotStart_(parameters->plotStart), separateFiles_(separateFile) { if(!plotFrequency_) @@ -526,7 +526,7 @@ namespace walberla { const auto * const tkeSGS = block->template getData<ScalarField_T>(meanTkeSGSFieldId_); WALBERLA_CHECK_NOT_NULLPTR(tkeSGS) - const auto * const sos = block->template getData<TensorField_T>(velocityWelford_->sum_of_squares_fieldID); + const auto * const sos = block->template getData<TensorField_T>(sosFieldId_); WALBERLA_CHECK_NOT_NULLPTR(sos) for(uint_t idx = 0; idx < parameters_->channelHalfWidth; ++idx) { @@ -541,7 +541,7 @@ namespace walberla { meanVelocityVector[idx] = meanVelocity->get(localCell, flowAxis); tkeSGSVector[idx] = tkeSGS->get(localCell); for(uint_t i = 0; i < TensorField_T::F_SIZE; ++i) { - reynoldsStressVector[idx*TensorField_T::F_SIZE+i] = sos->get(localCell,i) / velocityWelford_->counter_; + reynoldsStressVector[idx*TensorField_T::F_SIZE+i] = sos->get(localCell,i) / velocityWelford_->getCounter(); } tkeResolvedVector[idx] = real_c(0.5) * ( reynoldsStressVector[idx*TensorField_T::F_SIZE+0] + @@ -622,6 +622,7 @@ namespace walberla { const BlockDataID velocityFieldId_{}; const BlockDataID meanVelocityFieldId_{}; const BlockDataID meanTkeSGSFieldId_{}; + const BlockDataID sosFieldId_{}; const uint_t plotFrequency_{}; const uint_t plotStart_{}; @@ -836,16 +837,15 @@ namespace walberla { communication.addPackInfo(make_shared< PackInfo_T >(pdfFieldId)); auto setNewForce = [&](const real_t newForce) { - streamCollideSweep.F_x_ = newForce; - tkeSgsWriter.F_x_ = newForce; - tkeSgsWriter.F_x_ = newForce; + streamCollideSweep.setF_x(newForce); + tkeSgsWriter.setF_x(newForce); }; // plotting const bool outputSeparateFiles = channelParameter.getParameter<bool>("separate_files", false); const TurbulentChannelPlotter<WelfordSweep_T > plotter(&simulationParameters, &timeloop, &forceCalculator, blocks, velocityFieldId, meanVelocityFieldId, - meanTkeSgsFieldId, &welfordSweep, + meanTkeSgsFieldId, sosFieldId, &welfordSweep, outputSeparateFiles); //NOTE must convert sweeps that are altered to lambdas, otherwise copy and counter will stay 0 @@ -876,11 +876,11 @@ namespace walberla { timeloop.add() << Sweep(wfbLambda, "wall function bounce"); timeloop.add() << Sweep(streamCollideLambda, "stream and collide"); timeloop.add() << BeforeFunction([&](){ - const uint_t velCtr = uint_c(welfordSweep.counter_); + const uint_t velCtr = uint_c(welfordSweep.getCounter()); if((timeloop.getCurrentTimeStep() == simulationParameters.samplingStart) || (timeloop.getCurrentTimeStep() > simulationParameters.samplingStart && simulationParameters.samplingInterval && (velCtr % simulationParameters.samplingInterval == 0))) { - welfordSweep.counter_ = real_t(0); - welfordTKESweep.counter_ = real_t(0); + welfordSweep.setCounter(real_c(0.0)); + welfordTKESweep.setCounter(real_c(0.0)); for(auto & block : *blocks) { auto * sopField = block.template getData<TensorField_T >(sosFieldId); sopField->setWithGhostLayer(0.0); @@ -890,8 +890,8 @@ namespace walberla { } } - welfordSweep.counter_ = welfordSweep.counter_ + real_c(1); - welfordTKESweep.counter_ = welfordTKESweep.counter_ + real_c(1); + welfordSweep.setCounter(welfordSweep.getCounter() + real_c(1.0)); + welfordTKESweep.setCounter(welfordTKESweep.getCounter() + real_c(1.0)); }, "welford sweep") << Sweep(welfordLambda, "welford sweep"); timeloop.add() << Sweep(tkeSgsWriter, "TKE_SGS writer"); diff --git a/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h index 9471b6f29bdf7951199b34b87e593fb4622e46d2..a85b9a93bbb676824b7e82ce3a075cbde1979925 100644 --- a/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h +++ b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h @@ -450,6 +450,9 @@ class {{class_name}} } {%endif%} + {{kernel_list|generate_getter(parameter_registration=parameter_scaling)|indent(3)}} + {{kernel_list|generate_setter(parameter_registration=parameter_scaling)|indent(3)}} + private: shared_ptr< StructuredBlockForest > blocks_; {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}} diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py index d83ed6bba4197659ae447f773092111bcf7298ec..22ae0eceaf6cd78b5a4f9663c8c20cbe4a915966 100644 --- a/python/pystencils_walberla/jinja_filters.py +++ b/python/pystencils_walberla/jinja_filters.py @@ -106,6 +106,35 @@ def get_field_stride(param): return strides[param.symbol.coordinate] +def normalise_cell_interval(cell_interval, field_object): + if isinstance(cell_interval, str): + return cell_interval + elif isinstance(cell_interval, dict): + return cell_interval[field_object.name] + else: + return None + + +def get_start_coordinates(cell_interval, field_object, ghost_layers_to_include, required_ghost_layers): + ci = normalise_cell_interval(cell_interval, field_object) + if ci is None: + return [-ghost_layers_to_include - required_ghost_layers] * field_object.spatial_dimensions + else: + assert ghost_layers_to_include == 0 + return [sp.Symbol(f"{ci}.{coord_name}Min()") - required_ghost_layers for coord_name in ('x', 'y', 'z')] + +def get_end_coordinates(cell_interval, field_object, ghost_layers_to_include, required_ghost_layers): + ci = normalise_cell_interval(cell_interval, field_object) + if ci is None: + shape_names = ['xSize()', 'ySize()', 'zSize()'][:field_object.spatial_dimensions] + offset = 2 * ghost_layers_to_include + 2 * required_ghost_layers + return [f"int64_c({field_object.name}->{e}) + {offset}" for e in shape_names] + else: + assert ghost_layers_to_include == 0 + return [f"int64_c({ci}.{coord_name}Size()) + {2 * required_ghost_layers}" + for coord_name in ('x', 'y', 'z')] + + def generate_declaration(kernel_info, target=Target.CPU): """Generates the declaration of the kernel function""" target = translate_target(target) @@ -308,34 +337,6 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st required_ghost_layers = max(max(kernel_ghost_layers)) kernel_call_lines = [] - - def get_cell_interval(field_object): - if isinstance(cell_interval, str): - return cell_interval - elif isinstance(cell_interval, dict): - return cell_interval[field_object.name] - else: - return None - - def get_start_coordinates(field_object): - ci = get_cell_interval(field_object) - if ci is None: - return [-ghost_layers_to_include - required_ghost_layers] * field_object.spatial_dimensions - else: - assert ghost_layers_to_include == 0 - return [sp.Symbol(f"{ci}.{coord_name}Min()") - required_ghost_layers for coord_name in ('x', 'y', 'z')] - - def get_end_coordinates(field_object): - ci = get_cell_interval(field_object) - if ci is None: - shape_names = ['xSize()', 'ySize()', 'zSize()'][:field_object.spatial_dimensions] - offset = 2 * ghost_layers_to_include + 2 * required_ghost_layers - return [f"int64_c({field_object.name}->{e}) + {offset}" for e in shape_names] - else: - assert ghost_layers_to_include == 0 - return [f"int64_c({ci}.{coord_name}Size()) + {2 * required_ghost_layers}" - for coord_name in ('x', 'y', 'z')] - for param in ast_params: if param.is_field_parameter and FieldType.is_indexed(param.fields[0]): continue @@ -345,7 +346,7 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st if field.field_type == FieldType.BUFFER: kernel_call_lines.append(f"{param.symbol.dtype} {param.symbol.name} = {param.field_name};") else: - coordinates = get_start_coordinates(field) + coordinates = get_start_coordinates(cell_interval, field, ghost_layers_to_include, required_ghost_layers) actual_gls = f"int_c({param.field_name}->nrOfGhostLayers())" coord_set = set(coordinates) coord_set = sorted(coord_set, key=lambda e: str(e)) @@ -373,7 +374,8 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st coord = param.symbol.coordinate field = param.fields[0] type_str = param.symbol.dtype.c_name - shape = f"{type_str}({get_end_coordinates(field)[coord]})" + end_coord = get_end_coordinates(cell_interval, field, ghost_layers_to_include, required_ghost_layers)[coord] + shape = f"{type_str}({end_coord})" assert coord < 3 max_value = f"{field.name}->{('x', 'y', 'z')[coord]}SizeWithGhostLayer()" kernel_call_lines.append(f"WALBERLA_ASSERT_GREATER_EQUAL({max_value}, {shape})") @@ -711,8 +713,47 @@ def nested_class_method_definition_prefix(ctx, nested_class_name): return f"{outer_class}::{nested_class_name}" -@jinja2_context_decorator -def generate_parameter_registration(ctx, kernel_infos, parameter_registration): +def generate_getter(kernel_infos, parameter_registration=None): + if not isinstance(kernel_infos, Iterable): + kernel_infos = [kernel_infos] + + params_to_skip = [] + result = [] + for kernel_info in kernel_infos: + for param in kernel_info.parameters: + if not param.is_field_parameter and param.symbol.name not in params_to_skip: + if parameter_registration and param.symbol.name in parameter_registration.scaling_info: + continue + dtype = param.symbol.dtype + name = param.symbol.name + code_line = f"inline {dtype} get{name.capitalize()}() const {{ return {name}_; }}" + result.append(code_line) + params_to_skip.append(param.symbol.name) + + return "\n".join(result) + + +def generate_setter(kernel_infos, parameter_registration=None): + if not isinstance(kernel_infos, Iterable): + kernel_infos = [kernel_infos] + + params_to_skip = [] + result = [] + for kernel_info in kernel_infos: + for param in kernel_info.parameters: + if not param.is_field_parameter and param.symbol.name not in params_to_skip: + if parameter_registration and param.symbol.name in parameter_registration.scaling_info: + continue + dtype = param.symbol.dtype + name = param.symbol.name + code_line = f"inline void set{name.capitalize()}(const {dtype} value) {{ {name}_ = value; }}" + result.append(code_line) + params_to_skip.append(param.symbol.name) + + return "\n".join(result) + + +def generate_parameter_registration(kernel_infos, parameter_registration): if parameter_registration is None: return "" if not isinstance(kernel_infos, Iterable): @@ -731,8 +772,7 @@ def generate_parameter_registration(ctx, kernel_infos, parameter_registration): return "\n".join(result) -@jinja2_context_decorator -def generate_constructor(ctx, kernel_infos, parameter_registration): +def generate_constructor(kernel_infos, parameter_registration): if parameter_registration is None: return "" if not isinstance(kernel_infos, Iterable): @@ -752,8 +792,7 @@ def generate_constructor(ctx, kernel_infos, parameter_registration): return "\n".join(result) -@jinja2_context_decorator -def generate_field_strides(ctx, kernel_info): +def generate_field_strides(kernel_info): result = [] for param in kernel_info.parameters: if param.is_field_stride: @@ -835,3 +874,5 @@ def add_pystencils_filters_to_jinja_env(jinja_env): jinja_env.filters['list_of_expressions'] = generate_list_of_expressions jinja_env.filters['field_type'] = field_type jinja_env.filters['generate_field_strides'] = generate_field_strides + jinja_env.filters['generate_getter'] = generate_getter + jinja_env.filters['generate_setter'] = generate_setter diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.h b/python/pystencils_walberla/templates/Sweep.tmpl.h index f0f802288e4dd3a8b5835c7f5ad4960892ad6145..bebbe855c33cf00fc825ca51d5a057f05fa079a1 100644 --- a/python/pystencils_walberla/templates/Sweep.tmpl.h +++ b/python/pystencils_walberla/templates/Sweep.tmpl.h @@ -26,9 +26,7 @@ {%- elif target is equalto 'gpu' -%} #include "gpu/GPUField.h" #include "gpu/GPUWrapper.h" -{% if inner_outer_split -%} -#include "gpu/ParallelStreams.h" -{%- endif %} +{%if inner_outer_split %} #include "gpu/ParallelStreams.h" {% endif %} {%- endif %} #include "field/SwapableCompare.h" #include "domain_decomposition/BlockDataID.h" @@ -132,7 +130,7 @@ public: configured_ = true; } {% else %} - void configure( const shared_ptr<StructuredBlockStorage> & blocks, IBlock * block ){} + void configure( const shared_ptr<StructuredBlockStorage> & /*blocks*/, IBlock * /*block*/ ){} {% endif %} {% if inner_outer_split %} @@ -161,23 +159,23 @@ public: {%endif%} } - {{kernel|generate_members|indent(3)}} + {% endif %} -private: + {{kernel|generate_getter()|indent(3)}} + {{kernel|generate_setter()|indent(3)}} - {%if target is equalto 'gpu' %} gpu::ParallelStreams parallelStreams_; {% endif %} +private: + {%if target is equalto 'gpu' and inner_outer_split %} gpu::ParallelStreams parallelStreams_; {% endif %} + {{kernel|generate_members|indent(3)}} + {% if inner_outer_split %} Cell outerWidth_; std::vector<CellInterval> layers_; - {% if block_offset -%} - bool configured_; {% endif %} - {% else %} - {{ kernel|generate_members|indent(3) }} + {% if block_offset -%} bool configured_; {% endif %} -{% endif %} }; diff --git a/python/pystencils_walberla/templates/SweepCollection.tmpl.h b/python/pystencils_walberla/templates/SweepCollection.tmpl.h index 2a89fef5e4c6c825443db51ed1acc3a6d02ffaa8..004f975b4ed98bf1a0b8026821ed72c2792cc3fb 100644 --- a/python/pystencils_walberla/templates/SweepCollection.tmpl.h +++ b/python/pystencils_walberla/templates/SweepCollection.tmpl.h @@ -287,6 +287,9 @@ class {{class_name}} } {%endif%} + {{kernel_list|generate_getter(parameter_registration=parameter_scaling)|indent(3)}} + {{kernel_list|generate_setter(parameter_registration=parameter_scaling)|indent(3)}} + private: shared_ptr< StructuredBlockStorage > blocks_; {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}}