From cc8dffaa1e12c54225aafca16a3b2457e0c42b7f Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Wed, 12 Mar 2025 11:01:46 +0100
Subject: [PATCH] Toward HIP support in the code generator.

 - Set up separate cpu and hip targets for lbm test suite
 - Make codegen script target cmdline-configurable
 - Depend on new `Target.HIP` in pystencils
 - Enable custom codegen venv requirements file
 - Add debug build config to walberla.codegen
---
 .gitignore                                  |  3 +-
 cmake/PrepareSFG.cmake                      | 10 ++-
 codegen-requirements.txt                    |  2 +-
 noxfile.py                                  |  2 +
 src/walberla/codegen/api.py                 |  4 +-
 src/walberla/codegen/boundaries/freeslip.py | 26 +++++---
 src/walberla/codegen/build_config.py        |  6 ++
 src/walberla/codegen/sweep.py               | 67 ++++++++++-----------
 tests/BasicLbmScenarios/CMakeLists.txt      | 24 ++++++--
 tests/BasicLbmScenarios/LbmAlgorithms.py    | 46 ++++++++++++--
 tests/BasicLbmScenarios/SimDomain.hpp       | 39 +++++++++++-
 tests/CMakePresets.json                     | 13 +---
 12 files changed, 168 insertions(+), 74 deletions(-)

diff --git a/.gitignore b/.gitignore
index c970c0e..d26a949 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@
 
 #   dev environment
 **/.venv
+dev-codegen-requirements.txt
 
 #   build artifacts
 dist
@@ -25,4 +26,4 @@ coverage.xml
 CMakeUserPresets.json
 
 # scratch
-scratch
\ No newline at end of file
+scratch
diff --git a/cmake/PrepareSFG.cmake b/cmake/PrepareSFG.cmake
index 05dc07c..d1c21ac 100644
--- a/cmake/PrepareSFG.cmake
+++ b/cmake/PrepareSFG.cmake
@@ -10,6 +10,14 @@ if( WALBERLA_CODEGEN_PRIVATE_VENV )
     set(WALBERLA_CODEGEN_VENV_PATH ${CMAKE_CURRENT_BINARY_DIR}/codegen-venv CACHE PATH "Location of the virtual environment used for code generation")
     set(_venv_python_exe ${WALBERLA_CODEGEN_VENV_PATH}/bin/python)
 
+    set(
+        WALBERLA_CODEGEN_VENV_REQUIREMENTS
+        ${sfg_walberla_SOURCE_DIR}/codegen-requirements.txt
+        CACHE PATH
+        "Location of the primary requirements file for the codegen virtual environment"
+    )
+    mark_as_advanced(WALBERLA_CODEGEN_VENV_REQUIREMENTS)
+
     find_package( Python COMPONENTS Interpreter REQUIRED )
 
     if(NOT _sfg_private_venv_done)
@@ -25,7 +33,7 @@ if( WALBERLA_CODEGEN_PRIVATE_VENV )
         message( STATUS "Installing required Python packages..." )
 
         execute_process(
-            COMMAND ${_venv_python_exe} -m pip install -r ${sfg_walberla_SOURCE_DIR}/codegen-requirements.txt
+            COMMAND ${_venv_python_exe} -m pip install -r $CACHE{WALBERLA_CODEGEN_VENV_REQUIREMENTS}
             OUTPUT_QUIET
         )
 
diff --git a/codegen-requirements.txt b/codegen-requirements.txt
index a676a8e..b1d01e9 100644
--- a/codegen-requirements.txt
+++ b/codegen-requirements.txt
@@ -4,5 +4,5 @@ git+https://i10git.cs.fau.de/pycodegen/pystencils.git@v2.0-dev
 # lbmpy: feature branch for pystencils-2.0 compatibility
 git+https://i10git.cs.fau.de/pycodegen/lbmpy.git@fhennig/pystencils2.0-compat
 
-# pystencils-sfg: GPU dev branch
+# pystencils-sfg: master
 git+https://i10git.cs.fau.de/pycodegen/pystencils-sfg.git@fhennig/cuda-invoke
diff --git a/noxfile.py b/noxfile.py
index b4db7a5..0d27261 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -1,5 +1,7 @@
 import nox
 
+nox.options.sessions = ["lint", "typecheck"]
+
 
 def editable_install(session: nox.Session):
     session.install("-r", "codegen-requirements.txt")
diff --git a/src/walberla/codegen/api.py b/src/walberla/codegen/api.py
index 43ce022..53915c9 100644
--- a/src/walberla/codegen/api.py
+++ b/src/walberla/codegen/api.py
@@ -279,8 +279,8 @@ class GhostLayerFieldPtr(GenericWalberlaField):
 
 class GpuFieldPtr(GenericWalberlaField):
     _template = cpptype(
-        "walberla::gpu::GpuField< {element_type} >",
-        "gpu/GpuField.h",
+        "walberla::gpu::GPUField< {element_type} >",
+        "gpu/GPUField.h",
     )
 
     @staticmethod
diff --git a/src/walberla/codegen/boundaries/freeslip.py b/src/walberla/codegen/boundaries/freeslip.py
index 98471c3..699feb5 100644
--- a/src/walberla/codegen/boundaries/freeslip.py
+++ b/src/walberla/codegen/boundaries/freeslip.py
@@ -1,4 +1,4 @@
-from pystencils import Field, Assignment, CreateKernelConfig
+from pystencils import Field, Assignment, CreateKernelConfig, Target
 from pystencils.types import PsStructType
 
 from lbmpy.methods import AbstractLbMethod
@@ -31,11 +31,21 @@ class FreeSlip(CustomGenerator):
         lb_method: AbstractLbMethod,
         pdf_field: Field,
         wall_normal: tuple[int, int, int] | _IrregularSentinel,
+        target: Target | None = None,
     ):
         self._name = name
         self._method = lb_method
         self._pdf_field = pdf_field
         self._wall_normal = wall_normal
+        self._target = target
+
+    @property
+    def target(self) -> Target | None:
+        return self._target
+
+    @target.setter
+    def target(self, t: Target | None):
+        self._target = t
 
     def generate(self, sfg: SfgComposer) -> None:
         if self._wall_normal == self.IRREGULAR:
@@ -46,14 +56,12 @@ class FreeSlip(CustomGenerator):
     def _generate_irregular(self, sfg: SfgComposer):
         sfg.include("walberla/experimental/lbm/IrregularFreeSlip.hpp")
 
-        #   Get waLBerla build config
-        bc_obj = WalberlaIrregularFreeSlip()
-
         #   Get assignments for bc
+        bc_obj = WalberlaIrregularFreeSlip()
         bc_asm = bc_obj.get_assignments(self._method, self._pdf_field)
 
         #   Build generator config
-        bc_cfg = CreateKernelConfig()
+        bc_cfg = CreateKernelConfig(target=self._target)
         bc_cfg.index_dtype = BoundaryIndexType
         index_field = bc_obj.get_index_field()
         bc_cfg.index_field = index_field
@@ -66,7 +74,9 @@ class FreeSlip(CustomGenerator):
 
         #   Build factory
         factory_name = f"{self._name}Factory"
-        factory_crtp_base = f"walberla::experimental::lbm::IrregularFreeSlipFactory< {factory_name} >"
+        factory_crtp_base = (
+            f"walberla::experimental::lbm::IrregularFreeSlipFactory< {factory_name} >"
+        )
         index_vector = SparseIndexList(
             WalberlaIrregularFreeSlip.idx_struct_type, ref=True
         ).var("indexVector")
@@ -93,9 +103,7 @@ class FreeSlip(CustomGenerator):
                     "irregularFromIndexVector",
                 )
                 .returns(sweep_type.get_dtype())
-                .inline()(
-                    sfg.expr("return {};", sweep_type.ctor(**sweep_ctor_args))
-                ),
+                .inline()(sfg.expr("return {};", sweep_type.ctor(**sweep_ctor_args))),
             ),
         )
 
diff --git a/src/walberla/codegen/build_config.py b/src/walberla/codegen/build_config.py
index 8a0fbea..df9bd71 100644
--- a/src/walberla/codegen/build_config.py
+++ b/src/walberla/codegen/build_config.py
@@ -60,6 +60,8 @@ class WalberlaBuildConfig:
 
         if isinstance(ctx.project_info, WalberlaBuildConfig):
             return ctx.project_info
+        elif DEBUG_ENV.BUILD_CONFIG is not None:
+            return DEBUG_ENV.BUILD_CONFIG
         else:
             raise ValueError(
                 "The given SfgContext does not encapsulate a waLBerla build config object."
@@ -85,3 +87,7 @@ class WalberlaBuildConfig:
                 cfg.cpu.openmp.enable = True
 
         return cfg
+
+
+class DEBUG_ENV:
+    BUILD_CONFIG: WalberlaBuildConfig | None = None
diff --git a/src/walberla/codegen/sweep.py b/src/walberla/codegen/sweep.py
index 6662be7..66b743f 100644
--- a/src/walberla/codegen/sweep.py
+++ b/src/walberla/codegen/sweep.py
@@ -437,33 +437,22 @@ class Sweep(CustomGenerator):
         else:
             cfg = CreateKernelConfig(ghost_layers=0)
 
-        target = cfg.get_target()
-
-        if target.is_gpu():
-            manual_grid: bool = cfg.gpu.get_option("manual_launch_grid")
-            if manual_grid:
-                raise ValueError(
-                    "Setting `gpu.manual_launch_grid = True` is invalid for waLBerla sweeps."
-                )
+        manual_grid: bool = cfg.gpu.get_option("manual_launch_grid")
+        if manual_grid:
+            raise ValueError(
+                "Setting `gpu.manual_launch_grid = True` is invalid for waLBerla sweeps."
+            )
 
         self._name = name
-        self._target = target
+        self._gen_config = cfg
 
         if isinstance(assignments, AssignmentCollection):
             self._assignments = assignments
         else:
             self._assignments = AssignmentCollection(assignments)  # type: ignore
-        self._gen_config = cfg
 
+        #   Set only later once the full codegen config is known
         self._glfield_type: type[GpuFieldPtr] | type[GhostLayerFieldPtr]
-        if self._target.is_gpu():
-            self._glfield_type = GpuFieldPtr
-        elif self._target.is_cpu():
-            self._glfield_type = GhostLayerFieldPtr
-        else:
-            raise ValueError(
-                f"Cannot generate sweep for target {self._gen_config.target}"
-            )
 
         #   Map from shadow field to shadowed field
         self._shadow_fields: dict[Field, Field] = dict()
@@ -471,13 +460,6 @@ class Sweep(CustomGenerator):
         #   RESULTS - unset at this point
         self._generated_class: type[GeneratedClassWrapperBase] | None = None
 
-    #   READ-ONLY PROPERTIES
-
-    @property
-    def target(self) -> Target:
-        """The target architecture of this sweep"""
-        return self._target
-
     #   CONFIGURATION
 
     @property
@@ -517,6 +499,16 @@ class Sweep(CustomGenerator):
 
     #   CODE GENERATION
 
+    def _set_field_interface(self, target: Target):
+        if target.is_gpu():
+            self._glfield_type = GpuFieldPtr
+        elif target.is_cpu():
+            self._glfield_type = GhostLayerFieldPtr
+        else:
+            raise ValueError(
+                f"Cannot generate sweep for target {self._gen_config.target}"
+            )
+
     def _walberla_field(self, f: Field) -> GenericWalberlaField | IndexListBufferPtr:
         match f.field_type:
             case FieldType.GENERIC | FieldType.CUSTOM:
@@ -530,12 +522,12 @@ class Sweep(CustomGenerator):
                 )
 
     def _render_invocation(
-        self, sfg: SfgComposer, khandle: SfgKernelHandle
+        self, sfg: SfgComposer, target: Target, khandle: SfgKernelHandle
     ) -> tuple[SfgCallTreeNode, set[SfgVar]]:
         """Render and return the kernel invocation plus a set of additional parameters required
         at the call site."""
 
-        if self._target.is_gpu():
+        if target.is_gpu():
             # from pystencils.codegen.config import GpuIndexingScheme
 
             #   TODO: Want default values for properties first,
@@ -551,10 +543,19 @@ class Sweep(CustomGenerator):
             return (sfg.call(khandle), set())
 
     def generate(self, sfg: SfgComposer) -> None:
-        if self._target.is_gpu():
-            match self._target:
+        build_config = WalberlaBuildConfig.from_sfg(sfg)
+        gen_config = build_config.get_pystencils_config()
+        gen_config.override(self._gen_config)
+
+        target = gen_config.get_target()
+        self._set_field_interface(target)
+
+        if target.is_gpu():
+            match target:
                 case Target.CUDA:
                     sfg.use_cuda()
+                case Target.HIP:
+                    sfg.use_hip()
                 case _:
                     assert False, "unexpected GPU target"
 
@@ -562,12 +563,10 @@ class Sweep(CustomGenerator):
 
         assignments = BlockforestParameters.process(self._assignments)
 
-        build_config = WalberlaBuildConfig.from_sfg(sfg)
-        gen_config = build_config.get_pystencils_config()
-        gen_config.override(self._gen_config)
-
         khandle = knamespace.create(assignments, self._name, gen_config)
-        ker_invocation, ker_call_site_params = self._render_invocation(sfg, khandle)
+        ker_invocation, ker_call_site_params = self._render_invocation(
+            sfg, target, khandle
+        )
 
         all_fields: dict[str, FieldInfo] = {
             f.name: FieldInfo(
diff --git a/tests/BasicLbmScenarios/CMakeLists.txt b/tests/BasicLbmScenarios/CMakeLists.txt
index 468ce07..3c619ac 100644
--- a/tests/BasicLbmScenarios/CMakeLists.txt
+++ b/tests/BasicLbmScenarios/CMakeLists.txt
@@ -1,6 +1,20 @@
-add_executable( TestBasicLbmScenarios TestBasicLbmScenarios.cpp )
-walberla_generate_sources( TestBasicLbmScenarios SCRIPTS LbmAlgorithms.py )
-target_link_libraries( TestBasicLbmScenarios PRIVATE walberla::core walberla::blockforest walberla::field walberla::geometry walberla::experimental )
-add_test( NAME TestBasicLbmScenarios COMMAND TestBasicLbmScenarios )
+add_executable( TestBasicLbmScenariosCPU TestBasicLbmScenarios.cpp )
+walberla_generate_sources( TestBasicLbmScenariosCPU SCRIPTS LbmAlgorithms.py SCRIPT_ARGS --target=cpu )
+target_link_libraries( TestBasicLbmScenariosCPU PRIVATE walberla::core walberla::blockforest walberla::field walberla::geometry walberla::experimental )
+add_test( NAME TestBasicLbmScenariosCPU COMMAND TestBasicLbmScenariosCPU )
 
-add_dependencies( SfgTests TestBasicLbmScenarios )
+add_dependencies( SfgTests TestBasicLbmScenariosCPU )
+
+
+if( $CACHE{WALBERLA_BUILD_WITH_HIP} )
+    find_package(hip REQUIRED)
+
+    set( _codegen_suffixes hpp cpp )
+
+    add_executable( TestBasicLbmScenariosGPU TestBasicLbmScenarios.cpp )
+    walberla_generate_sources( TestBasicLbmScenariosGPU SCRIPTS LbmAlgorithms.py SCRIPT_ARGS --target=hip FILE_EXTENSIONS ${_codegen_suffixes} )
+    target_link_libraries( TestBasicLbmScenariosGPU PRIVATE walberla::core walberla::blockforest walberla::field walberla::gpu walberla::geometry walberla::experimental hip::host )
+    add_test( NAME TestBasicLbmScenariosGPU COMMAND TestBasicLbmScenariosGPU )
+
+    add_dependencies( SfgTests TestBasicLbmScenariosGPU )
+endif()
diff --git a/tests/BasicLbmScenarios/LbmAlgorithms.py b/tests/BasicLbmScenarios/LbmAlgorithms.py
index fab81e8..fae822e 100644
--- a/tests/BasicLbmScenarios/LbmAlgorithms.py
+++ b/tests/BasicLbmScenarios/LbmAlgorithms.py
@@ -1,7 +1,9 @@
+import argparse
+
 import sympy as sp
 from pystencilssfg import SourceFileGenerator
 
-from pystencils import fields, Field
+from pystencils import fields, Field, CreateKernelConfig, Target
 from lbmpy import (
     LBStencil,
     Stencil,
@@ -15,8 +17,36 @@ from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
 from walberla.codegen import Sweep
 from walberla.codegen.boundaries import FreeSlip
 
+from walberla.codegen.build_config import DEBUG_ENV, WalberlaBuildConfig
+
+DEBUG_ENV.BUILD_CONFIG = WalberlaBuildConfig(
+    "debug", "debug", True, False, True, True, False, True, False
+)
+
+with SourceFileGenerator(keep_unknown_argv=True) as sfg:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-t", "--target", dest="target", default="cpu", type=str)
+
+    args = parser.parse_args(sfg.context.argv)
+
+    cfg = CreateKernelConfig()
+
+    match args.target:
+        case "cpu":
+            cfg.target = Target.CurrentCPU
+            sfg.code("#define LBM_SCENARIOS_CPU_BUILD true")
+        case "hip":
+            cfg.target = Target.HIP
+            sfg.code("#define LBM_SCENARIOS_GPU_BUILD true")
+            sfg.context.clang_format.skip = (
+                True  # FIXME: Use .hip file extension instead
+            )
+        case "cuda":
+            cfg.target = Target.CUDA
+            sfg.code("#define LBM_SCENARIOS_GPU_BUILD true")
+        case _:
+            raise ValueError(f"Unexpected target id: {args.target}")
 
-with SourceFileGenerator() as sfg:
     sfg.namespace("BasicLbmScenarios::gen")
 
     stencil = LBStencil(Stencil.D3Q19)
@@ -47,7 +77,7 @@ with SourceFileGenerator() as sfg:
     assert lb_update is not None
 
     with sfg.namespace("bulk"):
-        lb_update_sweep = Sweep("LbStreamCollide", lb_update)
+        lb_update_sweep = Sweep("LbStreamCollide", lb_update, cfg)
         lb_update_sweep.swap_fields(f, f_tmp)
         sfg.generate(lb_update_sweep)
 
@@ -57,7 +87,7 @@ with SourceFileGenerator() as sfg:
             velocity=u.center_vector,
             pdfs=f,
         )
-        lb_init_sweep = Sweep("LbInitFromFields", lb_init)
+        lb_init_sweep = Sweep("LbInitFromFields", lb_init, cfg)
         sfg.generate(lb_init_sweep)
 
         lb_init_constant = macroscopic_values_setter(
@@ -66,11 +96,15 @@ with SourceFileGenerator() as sfg:
             velocity=sp.symbols(f"velocity_:{d}"),
             pdfs=f,
         )
-        lb_init_constant_sweep = Sweep("LbInitConstant", lb_init_constant)
+        lb_init_constant_sweep = Sweep("LbInitConstant", lb_init_constant, cfg)
         sfg.generate(lb_init_constant_sweep)
 
     with sfg.namespace("bc_sparse"):
         irreg_freeslip = FreeSlip(
-            "FreeSlipIrregular", lb_update.method, f, wall_normal=FreeSlip.IRREGULAR
+            "FreeSlipIrregular",
+            lb_update.method,
+            f,
+            wall_normal=FreeSlip.IRREGULAR,
+            target=cfg.target,
         )
         sfg.generate(irreg_freeslip)
diff --git a/tests/BasicLbmScenarios/SimDomain.hpp b/tests/BasicLbmScenarios/SimDomain.hpp
index f263374..1777c55 100644
--- a/tests/BasicLbmScenarios/SimDomain.hpp
+++ b/tests/BasicLbmScenarios/SimDomain.hpp
@@ -37,7 +37,7 @@ struct SimDomain
 
    CommScheme comm;
 
-#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+#if defined(LBM_SCENARIOS_GPU_BUILD)
    struct
    {
       const BlockDataID pdfsId;
@@ -45,6 +45,35 @@ struct SimDomain
       const BlockDataID uId;
    } gpuFields;
 
+   void initFromFields(const Vector3< real_t > force)
+   {
+      gen::bulk::LbInitFromFields initialize{ gpuFields.pdfsId, gpuFields.rhoId, gpuFields.uId, force };
+
+      for (auto& b : *blocks)
+      {
+         initialize(&b);
+      }
+   }
+
+   void initConstant(const real_t rho, const Vector3< real_t > u, const Vector3< real_t > force)
+   {
+      gen::bulk::LbInitConstant initialize{ gpuFields.pdfsId, force, rho, u };
+
+      for (auto& b : *blocks)
+      {
+         initialize(&b);
+      }
+   }
+
+   gen::bulk::LbStreamCollide streamCollideSweep(const real_t omega, const Vector3< real_t > force)
+   {
+      return { gpuFields.pdfsId, gpuFields.rhoId, gpuFields.uId, force, omega };
+   }
+
+   void sync() {
+      /* TODO */
+   }
+
 #else
 
    void initFromFields(const Vector3< real_t > force)
@@ -74,6 +103,8 @@ struct SimDomain
 
    void sync() { /* NOP */ }
 
+#endif
+
    void forAllBlocks(std::function< void(IBlock&) > func)
    {
       for (auto& block : *blocks)
@@ -87,8 +118,6 @@ struct SimDomain
             for (uint_t x = 0; x < blocks->getNumberOfXCellsPerBlock(); ++x)
                func({ x, y, z });
    }
-
-#endif
 };
 
 struct SimDomainBuilder
@@ -108,6 +137,10 @@ struct SimDomainBuilder
       const BlockDataID uId         = field::addToStorage< VectorField_T >(sbfs, "u", real_c(0.0));
       const BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(sbfs, "flagField");
 
+#if defined(LBM_SCENARIOS_GPU_BUILD)
+      static_assert(false, "TODO: Create GPU fields");
+#endif
+
       CommScheme comm{ sbfs };
       auto pdfsPackInfo = std::make_shared< PdfsPackInfo >(pdfsId);
       comm.addPackInfo(pdfsPackInfo);
diff --git a/tests/CMakePresets.json b/tests/CMakePresets.json
index 48449c2..b187c21 100644
--- a/tests/CMakePresets.json
+++ b/tests/CMakePresets.json
@@ -15,16 +15,5 @@
                 "WALBERLA_BUILD_TESTS": false
             }
         }
-    ],
-   "testPresets": [
-    {
-        "name": "SFG Testsuite",
-        "configurePreset": "testsuite-dbg",
-        "filter": {
-            "include": {
-                "name": "TestSparseSweeps"
-            }
-        }
-    }
-   ]
+    ]
 }
-- 
GitLab