From 1e7dfcbdd34ef20e164807c67ff4f5b834105f8c Mon Sep 17 00:00:00 2001 From: Stephan Seitz <stephan.seitz@fau.de> Date: Fri, 31 Jan 2020 13:48:29 +0100 Subject: [PATCH] Make stuff more or less working --- src/pystencils_autodiff/walberla.py | 59 +++++++++-- .../wald_und_wiesen_simulation.py | 98 +++++++------------ tests/test_walberla.py | 2 +- 3 files changed, 89 insertions(+), 70 deletions(-) diff --git a/src/pystencils_autodiff/walberla.py b/src/pystencils_autodiff/walberla.py index 87d2066..0f4ac2e 100644 --- a/src/pystencils_autodiff/walberla.py +++ b/src/pystencils_autodiff/walberla.py @@ -511,10 +511,10 @@ class TimeLoop(JinjaCppFile): class ForLoop(JinjaCppFile): TEMPLATE = jinja2.Template(""" -for( {{node.loop_symbol.dtype}} {{node.loop_symbol}} = {{node.loop_start}}; {{node.loop_symbol}} <= {{node.loop_end}}; {{node.loop_symbol}}+= {{node.loop_increment}} ) { -{%- for c in children %} -{{ c | indent(3) }} -{%- endfor -%} +for( {{loop_symbol.dtype}} {{loop_symbol}} = {{loop_start}}; {{loop_symbol}} < {{loop_end}}; {{loop_symbol}} += {{loop_increment}} ) { + {%- for c in children %} + {{ c | indent(3) -}} + {% endfor %} } """) # noqa @@ -672,6 +672,27 @@ class FieldCopyFunctor(JinjaCppFile): headers = ['"cuda/FieldCopy.h"'] +class DefineKernelObjects(JinjaCppFile): + + TEMPLATE = jinja2.Template(""" +// Kernels +{% for k in kernels -%} +{{ k }} +{% endfor %} +// Execution +{{ block }} +""") # noqa + + def __init__(self, block): + self.sweeps = block.atoms(SweepOverAllBlocks) + self.kernels = [SympyAssignment(k.ast_dict.functor.symbol, k.ast_dict.functor, + is_const=False, use_auto=True) for k in self.sweeps] + ast_dict = {'block': block, + 'kernels': self.kernels, + } + JinjaCppFile.__init__(self, ast_dict) + + class AllocateAllFields(JinjaCppFile): TEMPLATE = jinja2.Template(""" @@ -849,6 +870,7 @@ class SweepCreation(JinjaCppFile): 'parameter_ids': parameter_ids, 'parameter_str': ', '.join(p.name for p in parameter_ids)} super().__init__(ast_dict) + self.symbol = TypedSymbol(self.ast_dict.sweep_class_name.lower(), self.ast_dict.sweep_class_name) @property def headers(self): @@ -858,11 +880,17 @@ class SweepCreation(JinjaCppFile): def undefined_symbols(self): return set(self.ast_dict.parameter_ids) + @property + def symbols_defined(self): + return {self.symbol} + + def __sympy__(self): + return self.symbol + class SweepOverAllBlocks(JinjaCppFile): # TEMPLATE = jinja2.Template("""std::for_each({{block_forest}}->begin(), {{block_forest}}->end(), {{functor}});""") # noqa - TEMPLATE = jinja2.Template("""auto {{sweep_class_name | lower() }} = {{functor}}; -for( auto& block : *{{block_forest}}) {{sweep_class_name | lower() }}(&block);""") # noqa + TEMPLATE = jinja2.Template("""sweep({{block_forest}}, {{sweep_class_name | lower() }});""") def __init__(self, functor: SweepCreation, block_forest): ast_dict = {'functor': functor, @@ -870,9 +898,23 @@ for( auto& block : *{{block_forest}}) {{sweep_class_name | lower() }}(&block);"" 'block_forest': block_forest} super().__init__(ast_dict) + @property + def symbols_undefined(self): + return {self.ast_dict.functor.symbol} + + @property + def required_global_declarations(self): + return ["""template < class BlockStorage_T, class Functor_T > +static inline auto sweep(walberla::shared_ptr<BlockStorage_T> blocks, Functor_T functor) -> void { + for ( auto& block : *blocks ) { + functor(&block); + } +} +"""] + class FieldCopy(JinjaCppFile): - TEMPLATE = jinja2.Template("""cuda::fieldCpy<{{ src_type }}, {{ dst_type }}>({{ block_forest }}, {{ src_id }}, {{ dst_id }});""") # noqa + TEMPLATE = jinja2.Template("""cuda::fieldCpy < {{src_type }}, {{dst_type }} > ({{block_forest }}, {{src_id }}, {{dst_id }}); """) # noqa def __init__(self, block_forest, src_id, src_field, src_gpu, dst_id, dst_field, dst_gpu): src_type = _make_field_type(src_field, src_gpu) @@ -911,10 +953,11 @@ class Communication(JinjaCppFile): Prefer temporary fields in sweeps over this class! Two full fields have higher memory usage. """ - TEMPLATE = jinja2.Template("""communication()""") + TEMPLATE = jinja2.Template("""communication();""") def __init__(self, gpu): ast_dict = {'gpu': gpu} super().__init__(ast_dict) headers = ["<algorithm>"] + headers = ["<algorithm>"] diff --git a/src/pystencils_autodiff/wald_und_wiesen_simulation.py b/src/pystencils_autodiff/wald_und_wiesen_simulation.py index 9100dfb..72f5088 100644 --- a/src/pystencils_autodiff/wald_und_wiesen_simulation.py +++ b/src/pystencils_autodiff/wald_und_wiesen_simulation.py @@ -10,7 +10,6 @@ import itertools from typing import Dict -import sympy as sp from stringcase import camelcase, pascalcase import lbmpy_walberla @@ -18,9 +17,9 @@ import pystencils import pystencils_walberla.codegen from pystencils.astnodes import Block, EmptyLine from pystencils_autodiff.walberla import ( - AllocateAllFields, CMakeLists, DefinitionsHeader, FieldCopy, InitBoundaryHandling, - LbCommunicationSetup, ResolveUndefinedSymbols, RunTimeLoop, SwapFields, SweepCreation, - SweepOverAllBlocks, TimeLoop, UniformBlockforestFromConfig, WalberlaMain, WalberlaModule) + AllocateAllFields, CMakeLists, Communication, DefineKernelObjects, DefinitionsHeader, FieldCopy, + ForLoop, InitBoundaryHandling, LbCommunicationSetup, ResolveUndefinedSymbols, SwapFields, + SweepCreation, SweepOverAllBlocks, UniformBlockforestFromConfig, WalberlaMain, WalberlaModule) class WaldUndWiesenSimulation(): @@ -52,19 +51,21 @@ class WaldUndWiesenSimulation(): self._with_gui_default = False self._boundary_kernels = {} self._boundary_handling_target = boundary_handling_target - pystencils_walberla.codegen.generate_pack_info_for_field( - self._codegen_context, - 'PackInfo', - pystencils.Field.create_generic(graph_data_handling.fields['ldc_pdf'].name, - graph_data_handling.fields['ldc_pdf'].spatial_dimensions, - graph_data_handling.fields['ldc_pdf'].dtype.numpy_dtype, - graph_data_handling.fields['ldc_pdf'].index_dimensions, - index_shape=graph_data_handling.fields['ldc_pdf'].index_shape,), - target=self._boundary_handling_target) + self._data_handling.merge_swaps_with_kernel_calls() self._packinfo_class = 'PackInfo' def _create_helper_files(self) -> Dict[str, str]: + if self._lb_rule: + pystencils_walberla.codegen.generate_pack_info_for_field( + self._codegen_context, + 'PackInfo', + pystencils.Field.create_generic(self._data_handling.fields['ldc_pdf'].name, + self._data_handling.fields['ldc_pdf'].spatial_dimensions, + self._data_handling.fields['ldc_pdf'].dtype.numpy_dtype, + self._data_handling.fields['ldc_pdf'].index_dimensions, + index_shape=self._data_handling.fields['ldc_pdf'].index_shape,), + target=self._boundary_handling_target) lbmpy_walberla.generate_lattice_model(self._codegen_context, self._lb_model_name, self._lb_rule, refinement_scaling=self._refinement_scaling) @@ -100,7 +101,6 @@ class WaldUndWiesenSimulation(): else: pdf_field_id = None - self._data_handling.merge_swaps_with_kernel_calls() call_nodes = filter(lambda x: x, [self._graph_to_sweep(c) for c in self._data_handling.call_queue]) module = WalberlaModule(WalberlaMain(Block([ @@ -109,19 +109,22 @@ class WaldUndWiesenSimulation(): Block([ field_allocations, InitBoundaryHandling(self._block_forest.blocks, - flag_field_id, - pdf_field_id, - self.boundary_conditions, - self._boundary_kernels, - self._field_allocations) + flag_field_id, + pdf_field_id, + self.boundary_conditions, + self._boundary_kernels, + self._field_allocations) if self._boundary_handling else EmptyLine(), LbCommunicationSetup(self._lb_model_name, pdf_field_id, self._packinfo_class, self._boundary_handling_target) if self._lb_rule else EmptyLine(), - *call_nodes - ]), self.parameter_config_block) + DefineKernelObjects( + Block([*call_nodes]) + ) + ]), self.parameter_config_block + ) ]))) self._codegen_context.write_file("main.cpp", str(module)) @@ -155,9 +158,17 @@ class WaldUndWiesenSimulation(): from pystencils_autodiff.graph_datahandling import KernelCall, TimeloopRun, DataTransferKind, DataTransfer if isinstance(c, KernelCall): + + if 'indexField' in [f.name for f in c.kernel.ast.fields_accessed]: + bh = next(self._bh_cycler) + return f'sweep(blocks, {camelcase(bh)});' + sweep_class_name = next(self._kernel_class_generator) + fields_accessed = [f.name for f in c.kernel.ast.fields_accessed] + c.tmp_field_swaps = list(filter( + lambda x: x[0].name in fields_accessed and x[1].name in fields_accessed, c.tmp_field_swaps)) pystencils_walberla.codegen.generate_sweep( - self._codegen_context, sweep_class_name, c.kernel.ast) + self._codegen_context, sweep_class_name, c.kernel.ast, field_swaps=c.tmp_field_swaps) rtn = SweepOverAllBlocks(SweepCreation(sweep_class_name, self._field_allocations, c.kernel.ast, @@ -165,45 +176,8 @@ class WaldUndWiesenSimulation(): self._block_forest.blocks) elif isinstance(c, TimeloopRun): - sweeps = [] - for a in c.timeloop._single_step_asts: - if isinstance(a, KernelCall): - if 'indexField' in [f.name for f in a.kernel.ast.fields_accessed]: - bh = next(self._bh_cycler) - sweeps.append(camelcase(bh)) - continue - sweep_class_name = next(self._kernel_class_generator) - pystencils_walberla.codegen.generate_sweep( - self._codegen_context, sweep_class_name, a.kernel.ast, field_swaps=a.tmp_field_swaps) - sweeps.append(SweepCreation(sweep_class_name, - self._field_allocations, - a.kernel.ast, - parameters_to_ignore=[s[1].name for s in a.tmp_field_swaps])) - - elif isinstance(c, DataTransfer): - if c.kind in (DataTransferKind.HOST_COMMUNICATION, DataTransferKind.DEVICE_COMMUNICATION): - src = self._field_allocations._cpu_allocations[c.field.name].symbol - dst = self._field_allocations._cpu_allocations[c.destination.name].symbol - rtn = SwapFields(src, dst) - elif c.kind == DataTransferKind.DEVICE_SWAP: - src = self._field_allocations._gpu_allocations[c.field.name].symbol - dst = self._field_allocations._gpu_allocations[c.destination.name].symbol - rtn = SwapFields(src, dst) - elif c.kind == DataTransferKind.HOST_TO_DEVICE: - src = self._field_allocations._cpu_allocations[c.field.name].symbol - dst = self._field_allocations._gpu_allocations[c.field.name].symbol - rtn = FieldCopy(self._block_forest.blocks, src, c.field, False, dst, c.field, True) - elif c.kind == DataTransferKind.DEVICE_TO_HOST: - src = self._field_allocations._gpu_allocations[c.field.name].symbol - dst = self._field_allocations._cpu_allocations[c.field.name].symbol - rtn = FieldCopy(self._block_forest.blocks, src, c.field, True, dst, c.field, False) - else: - rtn = None - else: - print(f'time {c}') - - loop = TimeLoop(self._block_forest.blocks, [], sweeps, [], sp.S(c.time_steps)) - rtn = Block([loop, RunTimeLoop(self._block_forest.blocks, loop, self._with_gui, self._with_gui_default)]) + sweeps = [self._graph_to_sweep(s) for s in c.timeloop._single_step_asts] + rtn = ForLoop(0, c.time_steps, sweeps) elif isinstance(c, DataTransfer): if c.kind == DataTransferKind.HOST_SWAP: @@ -222,6 +196,8 @@ class WaldUndWiesenSimulation(): src = self._field_allocations._gpu_allocations[c.field.name].symbol dst = self._field_allocations._cpu_allocations[c.field.name].symbol rtn = FieldCopy(self._block_forest.blocks, src, c.field, True, dst, c.field, False) + elif c.kind in (DataTransferKind.DEVICE_COMMUNICATION, DataTransferKind.HOST_COMMUNICATION): + rtn = Communication(self._boundary_handling_target == 'gpu') else: rtn = None else: diff --git a/tests/test_walberla.py b/tests/test_walberla.py index 40ae8cb..5870763 100644 --- a/tests/test_walberla.py +++ b/tests/test_walberla.py @@ -82,7 +82,7 @@ def test_wald_wiesen_lbm(): lbm_step = ldc_setup(domain_size=(30, 30), optimization=opt_params, fixed_loop_sizes=False, lid_velocity=lid_velocity) - # del lbm_step.data_handling.gpu_arrays.ldc_pdf_tmp + del lbm_step.data_handling.gpu_arrays.ldc_pdf_tmp sim = WaldUndWiesenSimulation(lbm_step.data_handling, ctx, -- GitLab