diff --git a/src/walberla/codegen/sweep.py b/src/walberla/codegen/sweep.py index 789fa0382860206622e923b2d87719d2f290909b..9aad9ceb1910fd569fb83ded620b0ee414f7ece1 100644 --- a/src/walberla/codegen/sweep.py +++ b/src/walberla/codegen/sweep.py @@ -32,6 +32,7 @@ from pystencilssfg.lang import ( strip_ptr_ref, SupportsVectorExtraction, ) +from pystencilssfg.ir import SfgKernelHandle, SfgCallTreeNode from .build_config import WalberlaBuildConfig from .reflection import GeneratedClassWrapperBase from .api import ( @@ -422,21 +423,31 @@ class Sweep(CustomGenerator): ): if config is not None: cfg = config.copy() + else: + cfg = CreateKernelConfig(ghost_layers=0) + + if cfg.get_option("ghost_layers") is not None: + raise ValueError( + "Specifying `ghost_layers` in your codegen config is invalid when generating a waLBerla sweep." + ) - if cfg.get_option("ghost_layers") is not None: + if ( + cfg.get_option("iteration_slice") is None + and cfg.get_option("index_field") is None + ): + cfg.ghost_layers = 0 + + target = cfg.get_target() + + if target.is_gpu(): + manual_grid: bool = cfg.gpu.get_option("manual_launch_grid") + if manual_grid: raise ValueError( - "Specifying `ghost_layers` in your codegen config is invalid when generating a waLBerla sweep." + "Setting `gpu.manual_launch_grid = True` is invalid for waLBerla sweeps." ) - if ( - cfg.get_option("iteration_slice") is None - and cfg.get_option("index_field") is None - ): - cfg.ghost_layers = 0 - else: - cfg = CreateKernelConfig(ghost_layers=0) - self._name = name + self._target = target if isinstance(assignments, AssignmentCollection): self._assignments = assignments @@ -445,9 +456,9 @@ class Sweep(CustomGenerator): self._gen_config = cfg self._glfield_type: type[GpuFieldPtr] | type[GhostLayerFieldPtr] - if self._gen_config.get_target() == Target.CUDA: + if self._target.is_gpu(): self._glfield_type = GpuFieldPtr - elif self._gen_config.get_target().is_cpu(): + elif self._target.is_cpu(): self._glfield_type = GhostLayerFieldPtr else: raise ValueError( @@ -460,6 +471,13 @@ class Sweep(CustomGenerator): # RESULTS - unset at this point self._generated_class: type[GeneratedClassWrapperBase] | None = None + # READ-ONLY PROPERTIES + + @property + def target(self) -> Target: + """The target architecture of this sweep""" + return self._target + # CONFIGURATION @property @@ -511,7 +529,35 @@ class Sweep(CustomGenerator): f"Unable to map field {f} of type {f.field_type} to a waLBerla field." ) + def _render_invocation( + self, sfg: SfgComposer, khandle: SfgKernelHandle + ) -> tuple[SfgCallTreeNode, set[SfgVar]]: + """Render and return the kernel invocation plus a set of additional parameters required + at the call site.""" + + if self._target.is_gpu(): + # from pystencils.codegen.config import GpuIndexingScheme + + # TODO: Want default values for properties first, + # to define default stream and block size values + # indexing_scheme = self._gen_config.gpu.get_option("indexing_scheme") + # if indexing_scheme == GpuIndexingScheme.Linear3D: + # block_size = sfg.gpu_api.dim3(const=True).var("gpuBlockSize") + # return (sfg.gpu_invoke(khandle, block_size=block_size), {block_size}) + # else: + return (sfg.gpu_invoke(khandle), set()) + + else: + return (sfg.call(khandle), set()) + def generate(self, sfg: SfgComposer) -> None: + if self._target.is_gpu(): + match self._target: + case Target.CUDA: + sfg.use_cuda() + case _: + assert False, "unexpected GPU target" + knamespace = sfg.kernel_namespace(f"{self._name}_kernels") assignments = BlockforestParameters.process(self._assignments) @@ -521,6 +567,7 @@ class Sweep(CustomGenerator): gen_config.override(self._gen_config) khandle = knamespace.create(assignments, self._name, gen_config) + ker_invocation, ker_call_site_params = self._render_invocation(sfg, khandle) all_fields: dict[str, FieldInfo] = { f.name: FieldInfo( @@ -544,7 +591,7 @@ class Sweep(CustomGenerator): props = SweepClassProperties() - parameters = khandle.scalar_parameters + parameters = khandle.scalar_parameters | ker_call_site_params blockforest_params = BlockforestParameters(props, block, None) parameters = blockforest_params.filter_params(parameters) @@ -597,7 +644,7 @@ class Sweep(CustomGenerator): # Extract geometry information *(blockforest_params.render_extractions(sfg)), # Invoke the kernel - sfg.call(khandle), + ker_invocation, # Perform field swaps *( shadows_cache.perform_swap(orig_name, shadow_info)