From 1e7dfcbdd34ef20e164807c67ff4f5b834105f8c Mon Sep 17 00:00:00 2001
From: Stephan Seitz <stephan.seitz@fau.de>
Date: Fri, 31 Jan 2020 13:48:29 +0100
Subject: [PATCH] Make stuff more or less working

---
 src/pystencils_autodiff/walberla.py           | 59 +++++++++--
 .../wald_und_wiesen_simulation.py             | 98 +++++++------------
 tests/test_walberla.py                        |  2 +-
 3 files changed, 89 insertions(+), 70 deletions(-)

diff --git a/src/pystencils_autodiff/walberla.py b/src/pystencils_autodiff/walberla.py
index 87d2066..0f4ac2e 100644
--- a/src/pystencils_autodiff/walberla.py
+++ b/src/pystencils_autodiff/walberla.py
@@ -511,10 +511,10 @@ class TimeLoop(JinjaCppFile):
 class ForLoop(JinjaCppFile):
 
     TEMPLATE = jinja2.Template("""
-for( {{node.loop_symbol.dtype}} {{node.loop_symbol}} = {{node.loop_start}}; {{node.loop_symbol}} <= {{node.loop_end}}; {{node.loop_symbol}}+= {{node.loop_increment}} )  {
-{%- for c in children %}
-{{ c | indent(3) }}
-{%- endfor -%}
+for( {{loop_symbol.dtype}} {{loop_symbol}} = {{loop_start}}; {{loop_symbol}} < {{loop_end}}; {{loop_symbol}} += {{loop_increment}} ) {
+   {%- for c in children %}
+   {{ c | indent(3) -}}
+   {% endfor %}
 }
 """)  # noqa
 
@@ -672,6 +672,27 @@ class FieldCopyFunctor(JinjaCppFile):
     headers = ['"cuda/FieldCopy.h"']
 
 
+class DefineKernelObjects(JinjaCppFile):
+
+    TEMPLATE = jinja2.Template("""
+// Kernels
+{% for k in kernels -%}
+{{ k }}
+{% endfor %}
+// Execution
+{{ block  }}
+""")  # noqa
+
+    def __init__(self, block):
+        self.sweeps = block.atoms(SweepOverAllBlocks)
+        self.kernels = [SympyAssignment(k.ast_dict.functor.symbol, k.ast_dict.functor,
+                                        is_const=False, use_auto=True) for k in self.sweeps]
+        ast_dict = {'block': block,
+                    'kernels': self.kernels,
+                    }
+        JinjaCppFile.__init__(self, ast_dict)
+
+
 class AllocateAllFields(JinjaCppFile):
 
     TEMPLATE = jinja2.Template("""
@@ -849,6 +870,7 @@ class SweepCreation(JinjaCppFile):
                     'parameter_ids': parameter_ids,
                     'parameter_str': ', '.join(p.name for p in parameter_ids)}
         super().__init__(ast_dict)
+        self.symbol = TypedSymbol(self.ast_dict.sweep_class_name.lower(), self.ast_dict.sweep_class_name)
 
     @property
     def headers(self):
@@ -858,11 +880,17 @@ class SweepCreation(JinjaCppFile):
     def undefined_symbols(self):
         return set(self.ast_dict.parameter_ids)
 
+    @property
+    def symbols_defined(self):
+        return {self.symbol}
+
+    def __sympy__(self):
+        return self.symbol
+
 
 class SweepOverAllBlocks(JinjaCppFile):
     # TEMPLATE = jinja2.Template("""std::for_each({{block_forest}}->begin(), {{block_forest}}->end(), {{functor}});""")  # noqa
-    TEMPLATE = jinja2.Template("""auto {{sweep_class_name | lower() }} = {{functor}};
-for( auto& block : *{{block_forest}}) {{sweep_class_name | lower() }}(&block);""")  # noqa
+    TEMPLATE = jinja2.Template("""sweep({{block_forest}}, {{sweep_class_name | lower() }});""")
 
     def __init__(self, functor: SweepCreation, block_forest):
         ast_dict = {'functor': functor,
@@ -870,9 +898,23 @@ for( auto& block : *{{block_forest}}) {{sweep_class_name | lower() }}(&block);""
                     'block_forest': block_forest}
         super().__init__(ast_dict)
 
+    @property
+    def symbols_undefined(self):
+        return {self.ast_dict.functor.symbol}
+
+    @property
+    def required_global_declarations(self):
+        return ["""template < class BlockStorage_T, class Functor_T >
+static inline auto sweep(walberla::shared_ptr<BlockStorage_T> blocks, Functor_T functor) -> void {
+   for ( auto& block : *blocks ) {
+      functor(&block);
+   }
+}
+"""]
+
 
 class FieldCopy(JinjaCppFile):
-    TEMPLATE = jinja2.Template("""cuda::fieldCpy<{{ src_type }}, {{ dst_type }}>({{ block_forest }}, {{ src_id }}, {{ dst_id }});""")  # noqa
+    TEMPLATE = jinja2.Template("""cuda::fieldCpy < {{src_type }}, {{dst_type }} > ({{block_forest }}, {{src_id }}, {{dst_id }}); """)  # noqa
 
     def __init__(self, block_forest, src_id, src_field, src_gpu, dst_id, dst_field, dst_gpu):
         src_type = _make_field_type(src_field, src_gpu)
@@ -911,10 +953,11 @@ class Communication(JinjaCppFile):
          Prefer temporary fields in sweeps over this class! Two full fields have higher memory usage.
 
     """
-    TEMPLATE = jinja2.Template("""communication()""")
+    TEMPLATE = jinja2.Template("""communication();""")
 
     def __init__(self, gpu):
         ast_dict = {'gpu': gpu}
         super().__init__(ast_dict)
 
     headers = ["<algorithm>"]
+    headers = ["<algorithm>"]
diff --git a/src/pystencils_autodiff/wald_und_wiesen_simulation.py b/src/pystencils_autodiff/wald_und_wiesen_simulation.py
index 9100dfb..72f5088 100644
--- a/src/pystencils_autodiff/wald_und_wiesen_simulation.py
+++ b/src/pystencils_autodiff/wald_und_wiesen_simulation.py
@@ -10,7 +10,6 @@
 import itertools
 from typing import Dict
 
-import sympy as sp
 from stringcase import camelcase, pascalcase
 
 import lbmpy_walberla
@@ -18,9 +17,9 @@ import pystencils
 import pystencils_walberla.codegen
 from pystencils.astnodes import Block, EmptyLine
 from pystencils_autodiff.walberla import (
-    AllocateAllFields, CMakeLists, DefinitionsHeader, FieldCopy, InitBoundaryHandling,
-    LbCommunicationSetup, ResolveUndefinedSymbols, RunTimeLoop, SwapFields, SweepCreation,
-    SweepOverAllBlocks, TimeLoop, UniformBlockforestFromConfig, WalberlaMain, WalberlaModule)
+    AllocateAllFields, CMakeLists, Communication, DefineKernelObjects, DefinitionsHeader, FieldCopy,
+    ForLoop, InitBoundaryHandling, LbCommunicationSetup, ResolveUndefinedSymbols, SwapFields,
+    SweepCreation, SweepOverAllBlocks, UniformBlockforestFromConfig, WalberlaMain, WalberlaModule)
 
 
 class WaldUndWiesenSimulation():
@@ -52,19 +51,21 @@ class WaldUndWiesenSimulation():
         self._with_gui_default = False
         self._boundary_kernels = {}
         self._boundary_handling_target = boundary_handling_target
-        pystencils_walberla.codegen.generate_pack_info_for_field(
-            self._codegen_context,
-            'PackInfo',
-            pystencils.Field.create_generic(graph_data_handling.fields['ldc_pdf'].name,
-                                            graph_data_handling.fields['ldc_pdf'].spatial_dimensions,
-                                            graph_data_handling.fields['ldc_pdf'].dtype.numpy_dtype,
-                                            graph_data_handling.fields['ldc_pdf'].index_dimensions,
-                                            index_shape=graph_data_handling.fields['ldc_pdf'].index_shape,),
-            target=self._boundary_handling_target)
+        self._data_handling.merge_swaps_with_kernel_calls()
         self._packinfo_class = 'PackInfo'
 
     def _create_helper_files(self) -> Dict[str, str]:
+
         if self._lb_rule:
+            pystencils_walberla.codegen.generate_pack_info_for_field(
+                self._codegen_context,
+                'PackInfo',
+                pystencils.Field.create_generic(self._data_handling.fields['ldc_pdf'].name,
+                                                self._data_handling.fields['ldc_pdf'].spatial_dimensions,
+                                                self._data_handling.fields['ldc_pdf'].dtype.numpy_dtype,
+                                                self._data_handling.fields['ldc_pdf'].index_dimensions,
+                                                index_shape=self._data_handling.fields['ldc_pdf'].index_shape,),
+                target=self._boundary_handling_target)
             lbmpy_walberla.generate_lattice_model(self._codegen_context, self._lb_model_name,
                                                   self._lb_rule,
                                                   refinement_scaling=self._refinement_scaling)
@@ -100,7 +101,6 @@ class WaldUndWiesenSimulation():
         else:
             pdf_field_id = None
 
-        self._data_handling.merge_swaps_with_kernel_calls()
         call_nodes = filter(lambda x: x, [self._graph_to_sweep(c) for c in self._data_handling.call_queue])
 
         module = WalberlaModule(WalberlaMain(Block([
@@ -109,19 +109,22 @@ class WaldUndWiesenSimulation():
                 Block([
                     field_allocations,
                     InitBoundaryHandling(self._block_forest.blocks,
-                                         flag_field_id,
-                                         pdf_field_id,
-                                         self.boundary_conditions,
-                                         self._boundary_kernels,
-                                         self._field_allocations)
+                                             flag_field_id,
+                                             pdf_field_id,
+                                             self.boundary_conditions,
+                                             self._boundary_kernels,
+                                             self._field_allocations)
                     if self._boundary_handling else EmptyLine(),
                     LbCommunicationSetup(self._lb_model_name,
                                          pdf_field_id,
                                          self._packinfo_class,
                                          self._boundary_handling_target)
                     if self._lb_rule else EmptyLine(),
-                    *call_nodes
-                ]), self.parameter_config_block)
+                    DefineKernelObjects(
+                        Block([*call_nodes])
+                    )
+                ]), self.parameter_config_block
+            )
         ])))
 
         self._codegen_context.write_file("main.cpp", str(module))
@@ -155,9 +158,17 @@ class WaldUndWiesenSimulation():
         from pystencils_autodiff.graph_datahandling import KernelCall, TimeloopRun, DataTransferKind, DataTransfer
 
         if isinstance(c, KernelCall):
+
+            if 'indexField' in [f.name for f in c.kernel.ast.fields_accessed]:
+                bh = next(self._bh_cycler)
+                return f'sweep(blocks, {camelcase(bh)});'
+
             sweep_class_name = next(self._kernel_class_generator)
+            fields_accessed = [f.name for f in c.kernel.ast.fields_accessed]
+            c.tmp_field_swaps = list(filter(
+                lambda x: x[0].name in fields_accessed and x[1].name in fields_accessed, c.tmp_field_swaps))
             pystencils_walberla.codegen.generate_sweep(
-                self._codegen_context, sweep_class_name, c.kernel.ast)
+                self._codegen_context, sweep_class_name, c.kernel.ast, field_swaps=c.tmp_field_swaps)
             rtn = SweepOverAllBlocks(SweepCreation(sweep_class_name,
                                                    self._field_allocations,
                                                    c.kernel.ast,
@@ -165,45 +176,8 @@ class WaldUndWiesenSimulation():
                                      self._block_forest.blocks)
 
         elif isinstance(c, TimeloopRun):
-            sweeps = []
-            for a in c.timeloop._single_step_asts:
-                if isinstance(a, KernelCall):
-                    if 'indexField' in [f.name for f in a.kernel.ast.fields_accessed]:
-                        bh = next(self._bh_cycler)
-                        sweeps.append(camelcase(bh))
-                        continue
-                    sweep_class_name = next(self._kernel_class_generator)
-                    pystencils_walberla.codegen.generate_sweep(
-                        self._codegen_context, sweep_class_name, a.kernel.ast, field_swaps=a.tmp_field_swaps)
-                    sweeps.append(SweepCreation(sweep_class_name,
-                                                self._field_allocations,
-                                                a.kernel.ast,
-                                                parameters_to_ignore=[s[1].name for s in a.tmp_field_swaps]))
-
-                elif isinstance(c, DataTransfer):
-                    if c.kind in (DataTransferKind.HOST_COMMUNICATION, DataTransferKind.DEVICE_COMMUNICATION):
-                        src = self._field_allocations._cpu_allocations[c.field.name].symbol
-                        dst = self._field_allocations._cpu_allocations[c.destination.name].symbol
-                        rtn = SwapFields(src, dst)
-                    elif c.kind == DataTransferKind.DEVICE_SWAP:
-                        src = self._field_allocations._gpu_allocations[c.field.name].symbol
-                        dst = self._field_allocations._gpu_allocations[c.destination.name].symbol
-                        rtn = SwapFields(src, dst)
-                    elif c.kind == DataTransferKind.HOST_TO_DEVICE:
-                        src = self._field_allocations._cpu_allocations[c.field.name].symbol
-                        dst = self._field_allocations._gpu_allocations[c.field.name].symbol
-                        rtn = FieldCopy(self._block_forest.blocks, src, c.field, False, dst, c.field, True)
-                    elif c.kind == DataTransferKind.DEVICE_TO_HOST:
-                        src = self._field_allocations._gpu_allocations[c.field.name].symbol
-                        dst = self._field_allocations._cpu_allocations[c.field.name].symbol
-                        rtn = FieldCopy(self._block_forest.blocks, src, c.field, True, dst, c.field, False)
-                    else:
-                        rtn = None
-                else:
-                    print(f'time {c}')
-
-            loop = TimeLoop(self._block_forest.blocks, [], sweeps, [], sp.S(c.time_steps))
-            rtn = Block([loop, RunTimeLoop(self._block_forest.blocks, loop, self._with_gui, self._with_gui_default)])
+            sweeps = [self._graph_to_sweep(s) for s in c.timeloop._single_step_asts]
+            rtn = ForLoop(0, c.time_steps, sweeps)
 
         elif isinstance(c, DataTransfer):
             if c.kind == DataTransferKind.HOST_SWAP:
@@ -222,6 +196,8 @@ class WaldUndWiesenSimulation():
                 src = self._field_allocations._gpu_allocations[c.field.name].symbol
                 dst = self._field_allocations._cpu_allocations[c.field.name].symbol
                 rtn = FieldCopy(self._block_forest.blocks, src, c.field, True, dst, c.field, False)
+            elif c.kind in (DataTransferKind.DEVICE_COMMUNICATION, DataTransferKind.HOST_COMMUNICATION):
+                rtn = Communication(self._boundary_handling_target == 'gpu')
             else:
                 rtn = None
         else:
diff --git a/tests/test_walberla.py b/tests/test_walberla.py
index 40ae8cb..5870763 100644
--- a/tests/test_walberla.py
+++ b/tests/test_walberla.py
@@ -82,7 +82,7 @@ def test_wald_wiesen_lbm():
         lbm_step = ldc_setup(domain_size=(30, 30), optimization=opt_params,
                              fixed_loop_sizes=False, lid_velocity=lid_velocity)
 
-        # del lbm_step.data_handling.gpu_arrays.ldc_pdf_tmp
+        del lbm_step.data_handling.gpu_arrays.ldc_pdf_tmp
 
         sim = WaldUndWiesenSimulation(lbm_step.data_handling,
                                       ctx,
-- 
GitLab