diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt index 959f4e369f193fd85bfc97d89c9f5d4608b4bbbd..17ae659d7291558b0392bffe1b6457b82cc60449 100644 --- a/apps/benchmarks/CMakeLists.txt +++ b/apps/benchmarks/CMakeLists.txt @@ -17,6 +17,7 @@ add_subdirectory( PoiseuilleChannel ) add_subdirectory( ProbeVsExtraMessage ) add_subdirectory( SchaeferTurek ) add_subdirectory( UniformGrid ) +add_subdirectory( CompactInterpolation ) if ( WALBERLA_BUILD_WITH_PYTHON ) add_subdirectory( FieldCommunication ) @@ -26,7 +27,6 @@ if ( WALBERLA_BUILD_WITH_PYTHON ) add_subdirectory( PhaseFieldAllenCahn ) add_subdirectory( NonUniformGridCPU ) add_subdirectory( TurbulentChannel ) - add_subdirectory( CompactInterpolation ) endif() if ( WALBERLA_BUILD_WITH_CODEGEN AND WALBERLA_BUILD_WITH_GPU_SUPPORT ) diff --git a/apps/benchmarks/CompactInterpolation/CMakeLists.txt b/apps/benchmarks/CompactInterpolation/CMakeLists.txt index 101c5e3a474ccdeea5744b867208a73d76e142e9..a3333c5b8d3e3853ed9d9a82d09f17b835126afe 100644 --- a/apps/benchmarks/CompactInterpolation/CMakeLists.txt +++ b/apps/benchmarks/CompactInterpolation/CMakeLists.txt @@ -14,4 +14,4 @@ waLBerla_generate_target_from_python(NAME CompactInterpolationGenerated CompactInterpolationInfoHeader.h) waLBerla_add_executable( NAME CompactInterpolation FILES CompactInterpolation.cpp LdcSetup.h - DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated python_coupling timeloop vtk CompactInterpolationGenerated ) \ No newline at end of file + DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated timeloop vtk CompactInterpolationGenerated ) \ No newline at end of file diff --git a/apps/benchmarks/CompactInterpolation/CompactInterpolation.cpp b/apps/benchmarks/CompactInterpolation/CompactInterpolation.cpp index bf2d5e602fad990c78c663fa9264b3eff1175ce3..70a34ca4fb822409693d15a2eceff90a1b274701 100644 --- a/apps/benchmarks/CompactInterpolation/CompactInterpolation.cpp +++ b/apps/benchmarks/CompactInterpolation/CompactInterpolation.cpp @@ -280,8 +280,8 @@ int main(int argc, char** argv) timeLoop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); vtk::writeDomainDecomposition(blocks, "domain_decomposition", "vtk_out", "write_call", true, true, 0); } - LBMMeshRefinement.add2StepRefinementToTimeLoop(timeLoop); - //LBMMeshRefinement.addRefinementToTimeLoop(timeLoop); + //LBMMeshRefinement.add2StepRefinementToTimeLoop(timeLoop); + LBMMeshRefinement.addRefinementToTimeLoop(timeLoop); //LBMMeshRefinement.add2StepCommunicationToTimeLoop(timeLoop); diff --git a/python/lbmpy_walberla/interpolation_methods.py b/python/lbmpy_walberla/interpolation_methods.py index d4d4e346d9d6e3438733912a8ff4cddd3ba12a9b..f597342dd34c94a2f818182a4acd319d79113155 100644 --- a/python/lbmpy_walberla/interpolation_methods.py +++ b/python/lbmpy_walberla/interpolation_methods.py @@ -1,5 +1,6 @@ import sympy as sp from enum import Enum, auto +from itertools import product from pystencils import Assignment, Field from lbmpy.relaxationrates import get_shear_relaxation_rate, relaxation_rate_scaling @@ -59,8 +60,25 @@ class InterpolationConfig: NotImplementedError() - def fine_to_coarse_linear_interpolation_assignments(self, src, buffer, dir_indices): + def get_coarse_to_fine_assignments(self, src, buffer, dir_indices): + if self.coarse_to_fine_interpolation == Interpolation.Explosion: + return self.coarse_to_fine_interpolation_assignments(src, buffer, dir_indices) + #elif self.coarse_to_fine_interpolation == Interpolation.Compact: + # return self.fine_to_coarse_compact_interpolation_assignments(src, buffer, dir_indices) + else: + NotImplementedError() + + + def coarse_to_fine_interpolation_assignments(self, src, buffer, dir_indices): + assignments = [] + for o in range(8): + for i, d in enumerate(dir_indices): + assignments.append(Assignment(buffer(i + o * len(dir_indices)), src[d])) + return assignments + + + def fine_to_coarse_linear_interpolation_assignments(self, src, buffer, dir_indices): linear_interpol_offset = [(0,0,0), (1,0,0), (0,0,1), (1,0,1), (0,1,0), (1,1,0), (0,1,1), (1,1,1)] assignments = [] diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py index 20feba6717b578183b28870351547828ce437ae2..8d1bd13de8ef8d7fe5093214e08d1b8d38ab93fd 100644 --- a/python/lbmpy_walberla/packing_kernels.py +++ b/python/lbmpy_walberla/packing_kernels.py @@ -136,10 +136,11 @@ class PackingKernelsCodegen: def create_nonuniform_kernel_families(self, kernels_dict=None): kernels = dict() if kernels_dict is None else kernels_dict - kernels['localCopyRedistribute'] = self.get_local_copy_redistribute_kernel_family() - kernels['localPartialCoalescence'] = self.get_local_copy_partial_coalescence_kernel_family() - kernels['unpackRedistribute'] = self.get_unpack_redistribute_kernel_family() - kernels['packPartialCoalescence'] = self.get_pack_partial_coalescence_kernel_family() + kernels['localCoarseToFine'] = self.get_local_coarse_to_fine_kernel_family() + kernels['localFineToCoarse'] = self.get_local_fine_to_coarse_kernel_family() + kernels['packCoarseToFine'] = self.get_pack_coarse_to_fine_kernel_family() + kernels['unpackCoarseToFine'] = self.get_unpack_coarse_to_fine_kernel_family() + kernels['packFineToCoarse'] = self.get_pack_fine_to_coarse_kernel_family() return kernels # --------------------------- Pack / Unpack / LocalCopy All -------------------------------------------------------- @@ -269,25 +270,50 @@ class PackingKernelsCodegen: # --------------------------- Pack / Unpack / LocalCopy Coarse to Fine --------------------------------------------- - def get_unpack_redistribute_ast(self, comm_dir, timestep): + def get_pack_coarse_to_fine_ast(self, comm_dir, timestep): + + config = replace(self.config, ghost_layers=0) + assert not all(d == 0 for d in comm_dir) + + dir_string = offset_to_direction_string(comm_dir) + streaming_dirs = self.get_streaming_dirs(comm_dir) + #dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs) + dir_indices = [i for i in range(self.values_per_cell)] + + buffer = self._buffer(len(dir_indices) * 8) + src, _ = self._stream_out_accs(timestep) + + assignments = self.interpolation_config.get_coarse_to_fine_assignments(src, buffer, dir_indices) + + ast = create_kernel(assignments, config=config) + ast.function_name = f'packCoarseToFine_{dir_string}' + timestep_suffix(timestep) + return ast + + def get_pack_coarse_to_fine_kernel_family(self): + return self._construct_directionwise_kernel_family(self.get_pack_coarse_to_fine_ast) + + + def get_unpack_coarse_to_fine_ast(self, comm_dir, timestep): assert not all(d == 0 for d in comm_dir) dir_string = offset_to_direction_string(comm_dir) streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir)) - dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs) + #dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs) + dir_indices = [i for i in range(self.values_per_cell)] if len(dir_indices) == 0: return None - buffer = self._buffer(self.values_per_cell) + + buffer = self._buffer(len(dir_indices) * 8) _, dst = self._stream_out_accs(timestep) offsets = list(product(*((0, 1) for _ in comm_dir))) assignments = [] - for o in offsets: - for i in range(self.values_per_cell): - assignments.append(Assignment(dst[i].get_shifted(*o), buffer(i))) + for o, off in enumerate(offsets): + for i, dir_idx in enumerate(dir_indices): + assignments.append(Assignment(dst[dir_idx].get_shifted(*off), buffer(i + o * len(dir_indices)))) - function_name = f'unpackRedistribute_{dir_string}' + timestep_suffix(timestep) + function_name = f'unpackCoarseToFine_{dir_string}' + timestep_suffix(timestep) iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim)) config = CreateKernelConfig(function_name=function_name, iteration_slice=iteration_slice, data_type=self.data_type, ghost_layers=0, allow_double_writes=True, @@ -295,11 +321,11 @@ class PackingKernelsCodegen: return create_kernel(assignments, config=config) - def get_unpack_redistribute_kernel_family(self): - return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast, + def get_unpack_coarse_to_fine_kernel_family(self): + return self._construct_directionwise_kernel_family(self.get_unpack_coarse_to_fine_ast, exclude_time_step=Timestep.EVEN) - def get_local_copy_redistribute_ast(self, comm_dir, timestep): + def get_local_coarse_to_fine_ast(self, comm_dir, timestep): assert not all(d == 0 for d in comm_dir) ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)] @@ -339,7 +365,7 @@ class PackingKernelsCodegen: src_access = [int_div(ctr[i], 2) + o for i, o in enumerate(src[d].offsets)] assignments.append(Assignment(field_acc, src_abs.absolute_access(src_access, (d, )))) - function_name = f'localCopyRedistribute_{dir_string}' + timestep_suffix(timestep) + function_name = f'localCoarseToFine_{dir_string}' + timestep_suffix(timestep) iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim)) config = CreateKernelConfig(function_name=function_name, iteration_slice=iteration_slice, data_type=self.data_type, ghost_layers=0, allow_double_writes=True, @@ -347,19 +373,20 @@ class PackingKernelsCodegen: return create_kernel(assignments, config=config) - def get_local_copy_redistribute_kernel_family(self): - return self._construct_directionwise_kernel_family(self.get_local_copy_redistribute_ast) + def get_local_coarse_to_fine_kernel_family(self): + return self._construct_directionwise_kernel_family(self.get_local_coarse_to_fine_ast) # --------------------------- Pack / Unpack / LocalCopy Fine to Coarse --------------------------------------------- - def get_pack_partial_coalescence_ast(self, comm_dir, timestep): + def get_pack_fine_to_coarse_ast(self, comm_dir, timestep): assert not all(d == 0 for d in comm_dir) dir_string = offset_to_direction_string(comm_dir) streaming_dirs = self.get_streaming_dirs(comm_dir) - dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs) + #dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs) + dir_indices = [i for i in range(self.values_per_cell)] if len(dir_indices) == 0: return None - buffer = self._buffer(len(streaming_dirs)) + buffer = self._buffer(len(dir_indices)) src, _ = self._stream_out_accs(timestep) assignments = self.interpolation_config.get_fine_to_coarse_assignments(src, buffer, dir_indices) @@ -368,15 +395,15 @@ class PackingKernelsCodegen: config = replace(self.config, iteration_slice=iteration_slice, ghost_layers=0) ast = create_kernel(assignments, config=config) - ast.function_name = f'packPartialCoalescence_{dir_string}' + timestep_suffix(timestep) + ast.function_name = f'packFineToCoarse_{dir_string}' + timestep_suffix(timestep) return ast - def get_pack_partial_coalescence_kernel_family(self): - return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast, + def get_pack_fine_to_coarse_kernel_family(self): + return self._construct_directionwise_kernel_family(self.get_pack_fine_to_coarse_ast, exclude_time_step=Timestep.ODD) #TODO not working - def get_local_copy_partial_coalescence_ast(self, comm_dir, timestep): + def get_local_fine_to_coarse_ast(self, comm_dir, timestep): assert not all(d == 0 for d in comm_dir) ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)] @@ -405,11 +432,11 @@ class PackingKernelsCodegen: config = replace(self.config, iteration_slice=iteration_slice, ghost_layers=0) ast = create_kernel(assignments, config=config) - ast.function_name = f'localPartialCoalescence_{dir_string}' + timestep_suffix(timestep) + ast.function_name = f'localFineToCoarse_{dir_string}' + timestep_suffix(timestep) return ast - def get_local_copy_partial_coalescence_kernel_family(self): - return self._construct_directionwise_kernel_family(self.get_local_copy_partial_coalescence_ast) + def get_local_fine_to_coarse_kernel_family(self): + return self._construct_directionwise_kernel_family(self.get_local_fine_to_coarse_ast) # ------------------------------------------ Utility --------------------------------------------------------------- diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp index 0b2db4960c1ff886d5d88b18cc009934670760b3..611850b3c078de6dcb31430ea2414e039fe31463 100644 --- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp +++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp @@ -75,10 +75,11 @@ namespace {{namespace}} { {{ kernels['localCopyDirection'] | generate_definitions }} {% if nonuniform -%} - {{ kernels['localCopyRedistribute'] | generate_definitions }} - {{ kernels['localPartialCoalescence'] | generate_definitions }} - {{ kernels['unpackRedistribute'] | generate_definitions }} - {{ kernels['packPartialCoalescence'] | generate_definitions }} + {{ kernels['localCoarseToFine'] | generate_definitions }} + {{ kernels['localFineToCoarse'] | generate_definitions }} + {{ kernels['packCoarseToFine'] | generate_definitions }} + {{ kernels['unpackCoarseToFine'] | generate_definitions }} + {{ kernels['packFineToCoarse'] | generate_definitions }} {%- endif %} /************************************************************************************* @@ -222,48 +223,60 @@ namespace {{namespace}} { {% if nonuniform -%} - void {{class_name}}::PackKernels::localCopyRedistribute( + void {{class_name}}::PackKernels::localCoarseToFine( {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval", - "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters, + "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCoarseToFine'].kernel_selection_parameters, ["gpuStream_t stream"] if is_gpu else []] | type_identifier_list -}} ) const { - {{kernels['localCopyRedistribute'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }} + {{kernels['localCoarseToFine'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }} } - void {{class_name}}::PackKernels::localPartialCoalescence( + void {{class_name}}::PackKernels::localFineToCoarse( {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name, "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & srcInterval", - "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters, + "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localFineToCoarse'].kernel_selection_parameters, ["gpuStream_t stream"] if is_gpu else []] | type_identifier_list -}} ) const { - {{kernels['localPartialCoalescence'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval', density.name : 'srcInterval', velocity.name : 'srcInterval', strain_rate_tensor.name : 'srcInterval' }, stream='stream') | indent(6) }} + {{kernels['localFineToCoarse'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval', density.name : 'srcInterval', velocity.name : 'srcInterval', strain_rate_tensor.name : 'srcInterval' }, stream='stream') | indent(6) }} } - void {{class_name}}::PackKernels::unpackRedistribute( + void {{class_name}}::PackKernels::packCoarseToFine( + {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name, + "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & srcInterval", + "unsigned char * outBuffer", kernels['packCoarseToFine'].kernel_selection_parameters, + ["gpuStream_t stream = nullptr"] if is_gpu else []] + | type_identifier_list -}} + ) const + { + {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer); + {{kernels['packCoarseToFine'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval', density.name : 'srcInterval', velocity.name : 'srcInterval', strain_rate_tensor.name : 'srcInterval' }, stream='stream') | indent(6) }} + } + + void {{class_name}}::PackKernels::unpackCoarseToFine( {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci", - "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters, + "unsigned char * inBuffer", kernels['unpackCoarseToFine'].kernel_selection_parameters, ["gpuStream_t stream"] if is_gpu else []] | type_identifier_list -}} ) const { {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer); - {{kernels['unpackRedistribute'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }} + {{kernels['unpackCoarseToFine'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }} } - void {{class_name}}::PackKernels::packPartialCoalescence( + void {{class_name}}::PackKernels::packFineToCoarse( {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name, "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & ci", - "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters, + "unsigned char * outBuffer", kernels['packFineToCoarse'].kernel_selection_parameters, ["gpuStream_t stream"] if is_gpu else []] | type_identifier_list -}} ) const { {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer); - {{kernels['packPartialCoalescence'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }} + {{kernels['packFineToCoarse'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }} } {%- endif %} } // namespace {{namespace}} diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h index ec17cf6c248b12edefea82b1fd9d36897405c89c..a66e87c2df63b7b08f54151351200aae270a41e8 100644 --- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h +++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h @@ -217,9 +217,9 @@ class {{class_name}} /** * Local uniform redistribute. * */ - void localCopyRedistribute( + void localCoarseToFine( {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval", - "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters, + "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCoarseToFine'].kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if is_gpu else []] | type_identifier_list -}} ) const; @@ -227,20 +227,33 @@ class {{class_name}} /** * Local partial coalescence. * */ - void localPartialCoalescence( + void localFineToCoarse( {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name, "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & srcInterval", - "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters, + "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localFineToCoarse'].kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if is_gpu else []] | type_identifier_list -}} ) const; + + /** + * Packs populations coming from a coarse block onto the fine grid. + * */ + void packCoarseToFine( + {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name, + "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & ci", + "unsigned char * outBuffer", kernels['packCoarseToFine'].kernel_selection_parameters, + ["gpuStream_t stream = nullptr"] if is_gpu else []] + | type_identifier_list -}} + ) const; + + /** * Unpacks and uniformly redistributes populations coming from a coarse block onto the fine grid. * */ - void unpackRedistribute( + void unpackCoarseToFine( {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci", - "unsigned char * inBuffer", kernels['unpackRedistribute'].kernel_selection_parameters, + "unsigned char * inBuffer", kernels['unpackCoarseToFine'].kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if is_gpu else []] | type_identifier_list -}} ) const; @@ -248,10 +261,10 @@ class {{class_name}} /** * Partially coalesces and packs populations streaming from a fine block into a coarse block * */ - void packPartialCoalescence( + void packFineToCoarse( {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name, "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & ci", - "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters, + "unsigned char * outBuffer", kernels['packFineToCoarse'].kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if is_gpu else []] | type_identifier_list -}} ) const; @@ -275,7 +288,7 @@ class {{class_name}} * @return The required size of the buffer, in bytes * */ uint_t partialCoalescenceSize(CellInterval & ci, stencil::Direction dir) const { - return size(ci, dir) >> {{dimension}}; + return size(ci) >> {{dimension}}; } {%- endif %} diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h index fc44cdc44e2ddb24b8f2dd9dd4ca4a3a275be05b..ed93f59e081f58ed800447080d3fd0265b0b0484 100644 --- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h +++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h @@ -57,13 +57,16 @@ class NonuniformPackingKernelsWrapper void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, CellInterval dstInterval, Direction dir) const = 0; - void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval, + void localFineToCoarse(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, CellInterval dstInterval, Direction dir) const = 0; - void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, + void packCoarseToFine(PdfField_T* srcField, CellInterval& ci, + unsigned char* inBuffer, Direction dir) const = 0; + + void unpackCoarseToFine(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, stencil::Direction dir) const = 0; - void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci, + void packFineToCoarse(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir) const = 0; uint_t size(CellInterval& ci, Direction dir) const = 0; @@ -114,28 +117,33 @@ class NonuniformPackingKernelsWrapper< PdfField_T, false > kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir); } - void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, + void localCoarseToFine(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, CellInterval dstInterval, Direction dir) const { - kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir); + kernels_.localCoarseToFine(srcField, srcInterval, dstField, dstInterval, dir); } - void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval, + void localFineToCoarse(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, CellInterval dstInterval, Direction dir) const { - kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir); + kernels_.localFineToCoarse(srcField, srcInterval, dstField, dstInterval, dir); + } + + void packCoarseToFine(PdfField_T* srcField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const + { + kernels_.packCoarseToFine(srcField, nullptr, nullptr, nullptr, 0.0, ci, inBuffer, dir); } - void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, + void unpackCoarseToFine(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, stencil::Direction dir) const { - kernels_.unpackRedistribute(dstField, ci, inBuffer, dir); + kernels_.unpackCoarseToFine(dstField, ci, inBuffer, dir); } - void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci, + void packFineToCoarse(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir) const { - kernels_.packPartialCoalescence(srcField, nullptr, nullptr, nullptr, 0.0, ci, outBuffer, dir); + kernels_.packFineToCoarse(srcField, nullptr, nullptr, nullptr, 0.0, ci, outBuffer, dir); } uint_t size(CellInterval& ci, Direction dir) const { return kernels_.size(ci, dir); } @@ -210,46 +218,44 @@ class NonuniformPackingKernelsWrapper< PdfField_T, true > kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, timestep); } - void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval, + void localFineToCoarse(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, CellInterval dstInterval, Direction dir) const { uint8_t timestep = dstField->getTimestep(); WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must " "be on an even timestep, while the source field could either be " "on an even or an odd state.") - kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, timestep); + kernels_.localPartialCoalescence(srcField, srcInterval, dstField, dstInterval, dir, timestep); } - void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, + void packCoarseToFine(PdfField_T* srcField, CellInterval& ci, + unsigned char* inBuffer, Direction dir) const + { + uint8_t timestep = srcField->getTimestep(); + WALBERLA_ASSERT(((timestep & 1) ^ 1), "When the course to fine step is executed, the fine Field must " + "be on an odd timestep, while the source field could either be " + "on an even or an odd state.") + kernels_.packCoarseToFine(srcField, nullptr, nullptr, nullptr, 0.0, ci, inBuffer, dir, timestep); + } + + void unpackCoarseToFine(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, stencil::Direction dir) const { uint8_t timestep = dstField->getTimestep(); WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must " "be on an odd timestep, while the source field could either be " "on an even or an odd state.") - kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep); + kernels_.unpackCoarseToFine(dstField, ci, inBuffer, dir, timestep); } - void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci, + void packFineToCoarse(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir) const { uint8_t timestep = srcField->getTimestep(); WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must " "be on an even timestep, while the source field could either be " "on an even or an odd state.") - kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep); - } - - void zeroCoalescenceRegion(PdfField_T* dstField, CellInterval& ci, Direction dir) const - { - uint8_t timestep = dstField->getTimestep(); - kernels_.zeroCoalescenceRegion(dstField, ci, dir, timestep); - } - - void unpackCoalescence(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const - { - uint8_t timestep = dstField->getTimestep(); - kernels_.unpackCoalescence(dstField, ci, inBuffer, dir, timestep); + kernels_.packPartialCoalescence(srcField, ci, outBuffer, dir, timestep); } uint_t size(CellInterval& ci, Direction dir) const { return kernels_.size(ci, dir); } diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h index f83fb47a14704be9d8f14bd6c8c416ef0b9c6b9a..2d785cdc127e6ef432f33261cf63c65cfe22ebc9 100644 --- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h +++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h @@ -120,7 +120,7 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::packDataCoarseToFineImpl( { CellInterval ci = t.second; unsigned char* bufferPtr = buffer.forward(kernels_.size(ci)); - kernels_.packAll(field, ci, bufferPtr); + kernels_.packCoarseToFine(field, ci, bufferPtr, dir); } } @@ -139,7 +139,7 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::unpackDataCoarseToFine( CellInterval ci = t.second; uint_t size = kernels_.redistributeSize(ci); unsigned char* bufferPtr = buffer.skip(size); - kernels_.unpackRedistribute(field, ci, bufferPtr, d); + kernels_.unpackCoarseToFine(field, ci, bufferPtr, d); } } @@ -174,10 +174,10 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine( // TODO: This is a dirty workaround. Code-generate direct redistribution! std::vector< unsigned char > buffer(kernels_.size(srcInterval)); - kernels_.packAll(srcField, srcInterval, &buffer[0]); - kernels_.unpackRedistribute(dstField, dstInterval, &buffer[0], unpackDir); + kernels_.packCoarseToFine(srcField, srcInterval, &buffer[0], dir); + kernels_.unpackCoarseToFine(dstField, dstInterval, &buffer[0], unpackDir); - // kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, unpackDir); + // kernels_.localCoarseToFine(srcField, srcInterval, dstField, dstInterval, unpackDir); } } @@ -195,7 +195,7 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::unpa CellInterval ci = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender, dir, dstField); uint_t size = kernels_.size(ci, dir); unsigned char* bufferPtr = buffer.skip(size); - kernels_.unpackDirection(dstField, ci, bufferPtr, dir); + kernels_.unpackAll(dstField, ci, bufferPtr); } template< typename PdfField_T> @@ -209,8 +209,6 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::comm Block * varFineSender = const_cast< Block * >(fineSender); auto srcField = varFineSender->getData< PdfField_T >(pdfFieldID_); - auto srcCommData = varFineSender->getData< CommData_T >(commDataID_); - PartialCoalescenceMaskField * maskField = &(srcCommData->getMaskField()); auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_); CellInterval srcInterval; @@ -226,9 +224,10 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::comm // TODO: This is a dirty workaround. Code-generate direct redistribution! std::vector< unsigned char > buffer(kernels_.partialCoalescenceSize(srcInterval, dir)); - kernels_.packPartialCoalescence(srcField, maskField, srcInterval, &buffer[0], dir); - kernels_.unpackDirection(dstField, dstInterval, &buffer[0], invDir); - // kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir); + kernels_.packFineToCoarse(srcField, srcInterval, &buffer[0], dir); + kernels_.unpackAll(dstField, dstInterval, &buffer[0]); + //kernels_.unpackDirection(dstField, dstInterval, &buffer[0], invDir); + // kernels_.localFineToCoarse(srcField, srcInterval, dstField, dstInterval, dir); } template< typename PdfField_T> @@ -238,14 +237,11 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::pack { Block* varBlock = const_cast< Block* >(fineSender); auto srcField = varBlock->getData< PdfField_T >(pdfFieldID_); - auto commData = varBlock->getData< CommData_T >(commDataID_); - PartialCoalescenceMaskField * maskField = &(commData->getMaskField()); - CellInterval ci; srcField->getSliceBeforeGhostLayer(dir, ci, 2); uint_t size = kernels_.partialCoalescenceSize(ci, dir); unsigned char* bufferPtr = buffer.forward(size); - kernels_.packPartialCoalescence(srcField, maskField, ci, bufferPtr, dir); + kernels_.packFineToCoarse(srcField, ci, bufferPtr, dir); } /***********************************************************************************************************************