Apply auto formatting to config helper script and improve output style.

1d2747e4 · Michael Zikeli · ccfde436 · 1d2747e4
Commit 1d2747e4 authored 2 months ago by Michael Zikeli
--- a/python/waLBerla/tools/config/distributeProblemBetweenFixedDomain.py
+++ b/python/waLBerla/tools/config/distributeProblemBetweenFixedDomain.py
@@ -3,8 +3,11 @@ from math import floor, ceil, log2, prod
 from typing import Tuple, List
 import numpy as np
 # FIXME: Probably discard this method as it is not helpful to find problem sizes unequal the initial one.
-def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tuple[int, int, int]:
+def partition_domain_approximate_N(
+    N, Dx, Dy, Dz, verbose: bool = False
+) -> Tuple[int, int, int]:
    """
    Partitions N data points among a 3D domain split into Dx, Dy, Dz subdomains.
@@ -28,14 +31,16 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl
    N_sub = N / D_total  # This may not be an integer
    # Compute approximate cube root distribution
-    cube_root = (N_sub) ** (1/3)
+    cube_root = (N_sub) ** (1 / 3)
    n = np.array([max(1, round(cube_root * (d / D_sum))) for d in D])
    # Adjust for rounding errors to ensure total points remain close to N
    total_points = np.prod(n * D)
    if verbose:
-        print(f"ESTIMATE: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points.")
+        print(
+            f"ESTIMATE: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points."
+        )
    while total_points > N:
        # Reduce the largest dimension slightly to fit within N
@@ -44,15 +49,32 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl
        total_points = np.prod(n * D)
    if verbose:
-        print(f"REDUCE: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points.")
+        print(
+            f"REDUCE: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points."
+        )
    stay = True
    while stay and total_points < N:
        # Increase a dimension to reach N
        sorted_indices = np.argsort(n)
-        condition0 = D_total * n[sorted_indices[1]] * n[sorted_indices[2]] * (n[sorted_indices[0]] + 1)
+        condition0 = (
-        condition1 = D_total * n[sorted_indices[0]] * n[sorted_indices[2]] * (n[sorted_indices[1]] + 1)
+            D_total
-        condition2 = D_total * n[sorted_indices[0]] * n[sorted_indices[1]] * (n[sorted_indices[2]] + 1)
+            * n[sorted_indices[1]]
+            * n[sorted_indices[2]]
+            * (n[sorted_indices[0]] + 1)
+        )
+        condition1 = (
+            D_total
+            * n[sorted_indices[0]]
+            * n[sorted_indices[2]]
+            * (n[sorted_indices[1]] + 1)
+        )
+        condition2 = (
+            D_total
+            * n[sorted_indices[0]]
+            * n[sorted_indices[1]]
+            * (n[sorted_indices[2]] + 1)
+        )
        if condition0 <= N:
            n[sorted_indices[0]] += 1
        elif condition1 <= N:
@@ -64,17 +86,23 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl
        total_points = np.prod(n * D)
    if verbose:
-        print(f"FILL-UP: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points.")
+        print(
+            f"FILL-UP: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points."
+        )
-    if N-total_points != 0:
+    if N - total_points != 0:
-        print(f"The domain could not ideally be decomposed.\nThere are {N-total_points} less then requested.")
+        print(
+            f"The domain could not ideally be decomposed.\nThere are {N-total_points} less then requested."
+        )
-    n = tuple(sorted(n, reverse=True)) #descending order
+    n = tuple(sorted(n, reverse=True))  # descending order
    return int(n[0]), int(n[1]), int(n[2])
-def suggest_scaling_appropriate_sizes_around_N(N: int, search_range: int = 2) -> Tuple[int]:
+def suggest_scaling_appropriate_sizes_around_N(
+    N: int, search_range: int = 2
+) -> Tuple[int]:
    """
    Provides a set of numbers around target number `N` that are especially suited for scaling runs, in descending order.
    For scaling, it is favorable multiples of powers of two, to bisect your problem as often as possible.
@@ -94,25 +122,26 @@ def suggest_scaling_appropriate_sizes_around_N(N: int, search_range: int = 2) ->
    Returns:
    Tuple[int]: List of candidate problem sizes.
    """
-    first_candidate = 2**max(1, floor(log2(N)) - search_range)
+    first_candidate = 2 ** max(1, floor(log2(N)) - search_range)
-    candidates = [first_candidate * factor for factor in range(1, 2**(search_range+1))]
+    candidates = [
+        first_candidate * factor for factor in range(1, 2 ** (search_range + 1))
+    ]
    return tuple(reversed(candidates))
 def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_test(
-        max_number_of_nodes: int,
+    max_number_of_nodes: int,
-        processes_per_node: int,
+    processes_per_node: int,
-        memory: float,
+    memory: float,
-        precision: int,
+    precision: int,
-        size_q: int,
+    size_q: int,
-        safety_factor: float,
+    safety_factor: float,
-        name: str,
+    name: str,
-        prioritize_balanced_decompositions: bool = False,
+    prioritize_balanced_decompositions: bool = False,
-        max_number_of_candidates: int = 6,
+    max_number_of_candidates: int = 6,
-        min_distance_between_candidates: float = 2.0,
+    min_distance_between_candidates: float = 2.0,
-        debug: bool = False,
+    debug: bool = False,
 ) -> Tuple[np.ndarray, np.ndarray]:
    """
    Suggests a set of 3D problem decompositions that are especially suited for single node performance tests.
@@ -147,7 +176,10 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t
    #   - create all possible combinations of candidates, i.e., the number of combinations is binomial_coefficient(k=3, n=len(n_j_candidates))
    # decomposition_candidates = np.argsort(np.array(combinations_with_replacement(n_j_candidates, 3)), axis=1)
-    list_of_scaling_processes = processes_per_node * 2**np.arange(ceil(log2(max_number_of_nodes+1)))
+    list_of_scaling_processes = processes_per_node * 2 ** np.arange(
+        ceil(log2(max_number_of_nodes + 1))
+    )
    def valid_candidate_generator():
        #   - check which candidate combinations suffice the memory limit
        #   - check if I can bisect my problem until P_max
@@ -165,11 +197,15 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t
    #     [candidate for _, candidate in zip(range(max_number_of_candidates), valid_candidate_generator())]
    # )
    valid_n_j_candidates = np.array(list(valid_candidate_generator()))
-    reordered_candidates_for_scaling = valid_n_j_candidates[np.argsort(np.ptp(valid_n_j_candidates, axis=1))]
+    reordered_candidates_for_scaling = valid_n_j_candidates[
+        np.argsort(np.ptp(valid_n_j_candidates, axis=1))
+    ]
    # valid_candidates_for_scaling = valid_n_j_candidates[np.argsort(np.prod(valid_n_j_candidates, axis=1))[::-1][:max_number_of_candidates]]
-    def thin_out_candidates(candidates: np.ndarray, min_distance_between_candidates: float) -> np.ndarray:
+    def thin_out_candidates(
+        candidates: np.ndarray, min_distance_between_candidates: float
+    ) -> np.ndarray:
        """
        Thin out candidates by removing those that are too close to each other based on the min_distance_between_candidates.
        Prioritize keeping more equally distributed candidates.
@@ -177,78 +213,179 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t
        # FIXME what I actually want is not to have a relative change from one to the other. I want the memory utilization to be uniformly distributed or rather normally with the peak at the max utilization.
        thinned_candidates = []
        for candidate in candidates:
-            if all(np.prod(candidate) / np.prod(existing) > min_distance_between_candidates or np.prod(existing) / np.prod(candidate) > min_distance_between_candidates for existing in thinned_candidates):
+            if all(
+                np.prod(candidate) / np.prod(existing) > min_distance_between_candidates
+                or np.prod(existing) / np.prod(candidate)
+                > min_distance_between_candidates
+                for existing in thinned_candidates
+            ):
                thinned_candidates.append(candidate)
        return np.array(thinned_candidates)
    # Thin out valid_candidates_for_scaling
-    reordered_candidates_for_scaling = thin_out_candidates(reordered_candidates_for_scaling, min_distance_between_candidates)
+    reordered_candidates_for_scaling = thin_out_candidates(
+        reordered_candidates_for_scaling, min_distance_between_candidates
+    )
    # Reorder the thinned candidates to prioritize balance
    #   - discard all but the first `max_number_of_candidates` combinations which product is closest to N_max
-    valid_candidates_for_scaling = reordered_candidates_for_scaling[np.argsort(np.prod(reordered_candidates_for_scaling, axis=1))[::-1][:max_number_of_candidates]]
+    valid_candidates_for_scaling = reordered_candidates_for_scaling[
-    reordered_candidates_for_scaling = valid_candidates_for_scaling[np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))]
+        np.argsort(np.prod(reordered_candidates_for_scaling, axis=1))[::-1][
+            :max_number_of_candidates
+        ]
+    ]
+    reordered_candidates_for_scaling = valid_candidates_for_scaling[
+        np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))
+    ]
    #   - sort them to be as equally distributed as possible
    # reordered_candidates_for_scaling = valid_candidates_for_scaling[np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))]
-    reordered_candidates_for_scaling = valid_candidates_for_scaling[np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))]
+    reordered_candidates_for_scaling = valid_candidates_for_scaling[
+        np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))
+    ]
    # reordered_candidates_for_scaling = valid_n_j_candidates[np.argsort(np.ptp(valid_n_j_candidates, axis=1))[:max_number_of_candidates]]
    #   - denote if P_max can be included or not
-    index_of_scaling_candidates_that_allow_to_include_P_max = np.where(np.prod(valid_candidates_for_scaling, axis=1) % (processes_per_node * max_number_of_nodes) == 0)[0]
+    index_of_scaling_candidates_that_allow_to_include_P_max = np.where(
-    index_of_scaling_candidates_that_allow_to_include_P_max_reordered = np.where(np.prod(reordered_candidates_for_scaling, axis=1) % (processes_per_node * max_number_of_nodes) == 0)[0]
+        np.prod(valid_candidates_for_scaling, axis=1)
+        % (processes_per_node * max_number_of_nodes)
+        == 0
+    )[0]
+    index_of_scaling_candidates_that_allow_to_include_P_max_reordered = np.where(
+        np.prod(reordered_candidates_for_scaling, axis=1)
+        % (processes_per_node * max_number_of_nodes)
+        == 0
+    )[0]
    # DEBUG block
    if debug:
        relation_factor_n = np.max(valid_candidates_for_scaling[0])
-        print("\n", 50*"-", "\nInformation for best suited candidate on ", name)
+        print("\n", 50 * "-", "\nInformation for best suited candidate on ", name)
        print(f"It is possible to allocate a total of N={N_max:.3e} cells.")
        print(f"It is possible to allocate a total of n={n_max:.3e} cells per process.")
        if not prioritize_balanced_decompositions:
-            utilization = (processes_per_node * np.prod(valid_candidates_for_scaling, axis=1)) / N_max
+            utilization = (
-            unbalance_factor = np.maximum(0.01, np.ptp(valid_candidates_for_scaling, axis=1) / relation_factor_n)
+                processes_per_node * np.prod(valid_candidates_for_scaling, axis=1)
+            ) / N_max
+            unbalance_factor = np.maximum(
+                0.01, np.ptp(valid_candidates_for_scaling, axis=1) / relation_factor_n
+            )
            # size_order_score = utilization / unbalance_factor
-            print("Utilization      [without reordering] is ", utilization, " %."                              )
+            # print("Utilization      [without reordering] is ", utilization, " %.")
-            print("Unbalance factor [without reordering] is ", unbalance_factor, " %."                    )
+            # print("Unbalance factor [without reordering] is ", unbalance_factor, " %.")
            # print("Total score      [without reordering] is ", size_order_score, "."                      )
-            print("All available nodes can be included for candidates of index [without reordering]: ", index_of_scaling_candidates_that_allow_to_include_P_max)
+            # print(
-            print(*[f"{i:2}: {tuple(elem)},\t# N={processes_per_node * np.prod(elem):.3e} ~ {util:5.1%} | {balance:5.1%}" for i, (util, balance, elem) in enumerate(zip(utilization, unbalance_factor, valid_candidates_for_scaling))], sep='\n')
+            #     "All available nodes can be included for candidates of index [without reordering]: ",
+            #     index_of_scaling_candidates_that_allow_to_include_P_max,
+            # )
+            print(
+                *[
+                    f"""{i:2}: {tuple(elem)},\t"""
+                    f"""# N={processes_per_node * np.prod(elem):.3e} ~ {util:5.1%} | """
+                    f"""{balance:5.1%}"""
+                    f"""\t{'# ' + str(max_number_of_nodes) + ' nodes included' if i in index_of_scaling_candidates_that_allow_to_include_P_max else ''}"""
+                    for i, (util, balance, elem) in enumerate(
+                        zip(utilization, unbalance_factor, valid_candidates_for_scaling)
+                    )
+                ],
+                sep="\n",
+            )
        elif prioritize_balanced_decompositions:
-            utilization_reordered = (processes_per_node * np.prod(reordered_candidates_for_scaling, axis=1)) / N_max
+            utilization_reordered = (
-            unbalance_factor_reordered = np.maximum(0.01, np.ptp(reordered_candidates_for_scaling, axis=1) / relation_factor_n)
+                processes_per_node * np.prod(reordered_candidates_for_scaling, axis=1)
+            ) / N_max
+            unbalance_factor_reordered = np.maximum(
+                0.01,
+                np.ptp(reordered_candidates_for_scaling, axis=1) / relation_factor_n,
+            )
            # balance_order_score = utilization_reordered / unbalance_factor_reordered
-            print("Utilization      [with    reordering] is ", utilization_reordered, " %."            )
+            # print(
-            print("Unbalance factor [with    reordering] is ", unbalance_factor_reordered, " %.")
+            #     "Utilization      [with    reordering] is ",
+            #     utilization_reordered,
+            #     " %.",
+            # )
+            # print(
+            #     "Unbalance factor [with    reordering] is ",
+            #     unbalance_factor_reordered,
+            #     " %.",
+            # )
            # print("Total score      [with    reordering] is ", balance_order_score, "."                )
-            print("All available nodes can be included for candidates of index [with    reordering]: ", index_of_scaling_candidates_that_allow_to_include_P_max_reordered)
+            # print(
-            print(*[f"{i:2}: {tuple(elem)},\t# N={processes_per_node * np.prod(elem):.3e} ~ {util:5.1%} | {balance:5.1%}" for i, (util, balance, elem) in enumerate(zip(utilization_reordered, unbalance_factor_reordered, reordered_candidates_for_scaling))], sep='\n')
+            #     "All available nodes can be included for candidates of index [with    reordering]: ",
-        print("Use 3-5 of these candidates for single node performance tests. Then use the best one for scaling.")
+            #     index_of_scaling_candidates_that_allow_to_include_P_max_reordered,
+            # )
-        # TODO create functions out of the sub-steps
+            print(
+                *[
+                    f"""{i:2}: {tuple(elem)},\t"""
+                    f"""# N={processes_per_node * np.prod(elem):.3e} ~ {util:5.1%} | """
+                    f"""{balance:5.1%}"""
+                    f"""\t{'# ' + str(max_number_of_nodes) + ' nodes included' if i in index_of_scaling_candidates_that_allow_to_include_P_max_reordered else ''}"""
+                    for i, (util, balance, elem) in enumerate(
+                        zip(
+                            utilization_reordered,
+                            unbalance_factor_reordered,
+                            reordered_candidates_for_scaling,
+                        )
+                    )
+                ],
+                sep="\n",
+            )
+        print(
+            "Use 3-5 of these candidates for single node performance tests. Then use the best one for scaling."
+        )
+    # 100 nodes included
+    # TODO create functions out of the sub-steps
    if prioritize_balanced_decompositions:
-        return reordered_candidates_for_scaling, index_of_scaling_candidates_that_allow_to_include_P_max_reordered
+        return (
+            reordered_candidates_for_scaling,
+            index_of_scaling_candidates_that_allow_to_include_P_max_reordered,
+        )
    else:
-        return valid_candidates_for_scaling, index_of_scaling_candidates_that_allow_to_include_P_max
+        return (
+            valid_candidates_for_scaling,
+            index_of_scaling_candidates_that_allow_to_include_P_max,
+        )
 if __name__ == "__main__":
    # Example usage
-    for max_number_of_nodes, processes_per_node, memory, precision, size_q, safety_factor, name in [
+    for (
+        max_number_of_nodes,
+        processes_per_node,
+        memory,
+        precision,
+        size_q,
+        safety_factor,
+        name,
+    ) in [
        # ( 100, 112, 2.56e11, 8, 27, 1.4, "MareNostrum5-GPP"),
        # ( 100,   1, 6.40e10, 8, 19, 1.2, "MareNostrum5-ACC"), # NOTE: you have 4 times the GPUs but each one has it's own memory
+        # ( 100,   1, 6.40e10, 8, 27, 1.2, "MareNostrum5-ACC"), # NOTE: you have 4 times the GPUs but each one has it's own memory
+        # ( 32,   48, 3.20e10, 8, 27, 1.4, "MareNostrum5-ACC"), # NOTE: you have 4 times the GPUs but each one has it's own memory
        # ( 512, 128, 2.56e11, 8, 27, 1.4, "Q27-LUMI-C"),
        # ( 512, 128, 2.56e11, 8, 19, 1.4, "Q19-LUMI-C"),
        # ( 512, 128, 2.56e11, 8, 15, 1.4, "Q15-LUMI-C"),
        # ( 128, 128, 5.12e11, 8, 27, 1.4, "LUMI-C-big"),
-        (1024,   2, 1.28e11, 8, 19, 1.2, "LUMI-G"), # NOTE: you have 8 times the GPUs but each one has it's own memory
+        # (1024,   2, 1.28e11, 8, 19, 1.2, "LUMI-G"), # NOTE: you have 8 times the GPUs but each one has it's own memory
        # (1024,   2, 1.28e11, 8, 26, 1.2, "MTW-LUMI-G"), # NOTE: you have 8 times the GPUs but each one has it's own memory
+        (128,   128, 1.00e09, 8, 19, 1.2, "Deucalion-ARM"),  # FIXME add the right parameters for Deucalion
    ]:
-        _, _ = suggest_best_3d_problem_decompositions_for_initial_single_node_performance_test(
+        _, _ = (
-            max_number_of_nodes, processes_per_node, memory, precision, size_q, safety_factor, name, max_number_of_candidates=20, min_distance_between_candidates=1.3, debug=True
+            suggest_best_3d_problem_decompositions_for_initial_single_node_performance_test(
+                max_number_of_nodes,
+                processes_per_node,
+                memory,
+                precision,
+                size_q,
+                safety_factor,
+                name,
+                max_number_of_candidates=20,
+                min_distance_between_candidates=1.2,
+                prioritize_balanced_decompositions=False,
+                debug=True,
+            )
        )
 # Fluid Q19
 # Temperature Q7
\ No newline at end of file