Skip to content
Snippets Groups Projects
Commit 1d2747e4 authored by Michael Zikeli's avatar Michael Zikeli
Browse files

Apply auto formatting to config helper script and improve output style.

parent ccfde436
No related branches found
No related tags found
No related merge requests found
Pipeline #77843 failed
...@@ -3,8 +3,11 @@ from math import floor, ceil, log2, prod ...@@ -3,8 +3,11 @@ from math import floor, ceil, log2, prod
from typing import Tuple, List from typing import Tuple, List
import numpy as np import numpy as np
# FIXME: Probably discard this method as it is not helpful to find problem sizes unequal the initial one. # FIXME: Probably discard this method as it is not helpful to find problem sizes unequal the initial one.
def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tuple[int, int, int]: def partition_domain_approximate_N(
N, Dx, Dy, Dz, verbose: bool = False
) -> Tuple[int, int, int]:
""" """
Partitions N data points among a 3D domain split into Dx, Dy, Dz subdomains. Partitions N data points among a 3D domain split into Dx, Dy, Dz subdomains.
...@@ -28,14 +31,16 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl ...@@ -28,14 +31,16 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl
N_sub = N / D_total # This may not be an integer N_sub = N / D_total # This may not be an integer
# Compute approximate cube root distribution # Compute approximate cube root distribution
cube_root = (N_sub) ** (1/3) cube_root = (N_sub) ** (1 / 3)
n = np.array([max(1, round(cube_root * (d / D_sum))) for d in D]) n = np.array([max(1, round(cube_root * (d / D_sum))) for d in D])
# Adjust for rounding errors to ensure total points remain close to N # Adjust for rounding errors to ensure total points remain close to N
total_points = np.prod(n * D) total_points = np.prod(n * D)
if verbose: if verbose:
print(f"ESTIMATE: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points.") print(
f"ESTIMATE: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points."
)
while total_points > N: while total_points > N:
# Reduce the largest dimension slightly to fit within N # Reduce the largest dimension slightly to fit within N
...@@ -44,15 +49,32 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl ...@@ -44,15 +49,32 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl
total_points = np.prod(n * D) total_points = np.prod(n * D)
if verbose: if verbose:
print(f"REDUCE: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points.") print(
f"REDUCE: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points."
)
stay = True stay = True
while stay and total_points < N: while stay and total_points < N:
# Increase a dimension to reach N # Increase a dimension to reach N
sorted_indices = np.argsort(n) sorted_indices = np.argsort(n)
condition0 = D_total * n[sorted_indices[1]] * n[sorted_indices[2]] * (n[sorted_indices[0]] + 1) condition0 = (
condition1 = D_total * n[sorted_indices[0]] * n[sorted_indices[2]] * (n[sorted_indices[1]] + 1) D_total
condition2 = D_total * n[sorted_indices[0]] * n[sorted_indices[1]] * (n[sorted_indices[2]] + 1) * n[sorted_indices[1]]
* n[sorted_indices[2]]
* (n[sorted_indices[0]] + 1)
)
condition1 = (
D_total
* n[sorted_indices[0]]
* n[sorted_indices[2]]
* (n[sorted_indices[1]] + 1)
)
condition2 = (
D_total
* n[sorted_indices[0]]
* n[sorted_indices[1]]
* (n[sorted_indices[2]] + 1)
)
if condition0 <= N: if condition0 <= N:
n[sorted_indices[0]] += 1 n[sorted_indices[0]] += 1
elif condition1 <= N: elif condition1 <= N:
...@@ -64,17 +86,23 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl ...@@ -64,17 +86,23 @@ def partition_domain_approximate_N(N, Dx, Dy, Dz, verbose: bool = False) -> Tupl
total_points = np.prod(n * D) total_points = np.prod(n * D)
if verbose: if verbose:
print(f"FILL-UP: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points.") print(
f"FILL-UP: I want to have {N} points, but I get {total_points} points. So a difference of {N-total_points} points."
)
if N-total_points != 0: if N - total_points != 0:
print(f"The domain could not ideally be decomposed.\nThere are {N-total_points} less then requested.") print(
f"The domain could not ideally be decomposed.\nThere are {N-total_points} less then requested."
)
n = tuple(sorted(n, reverse=True)) #descending order n = tuple(sorted(n, reverse=True)) # descending order
return int(n[0]), int(n[1]), int(n[2]) return int(n[0]), int(n[1]), int(n[2])
def suggest_scaling_appropriate_sizes_around_N(N: int, search_range: int = 2) -> Tuple[int]: def suggest_scaling_appropriate_sizes_around_N(
N: int, search_range: int = 2
) -> Tuple[int]:
""" """
Provides a set of numbers around target number `N` that are especially suited for scaling runs, in descending order. Provides a set of numbers around target number `N` that are especially suited for scaling runs, in descending order.
For scaling, it is favorable multiples of powers of two, to bisect your problem as often as possible. For scaling, it is favorable multiples of powers of two, to bisect your problem as often as possible.
...@@ -94,25 +122,26 @@ def suggest_scaling_appropriate_sizes_around_N(N: int, search_range: int = 2) -> ...@@ -94,25 +122,26 @@ def suggest_scaling_appropriate_sizes_around_N(N: int, search_range: int = 2) ->
Returns: Returns:
Tuple[int]: List of candidate problem sizes. Tuple[int]: List of candidate problem sizes.
""" """
first_candidate = 2**max(1, floor(log2(N)) - search_range) first_candidate = 2 ** max(1, floor(log2(N)) - search_range)
candidates = [first_candidate * factor for factor in range(1, 2**(search_range+1))] candidates = [
first_candidate * factor for factor in range(1, 2 ** (search_range + 1))
]
return tuple(reversed(candidates)) return tuple(reversed(candidates))
def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_test( def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_test(
max_number_of_nodes: int, max_number_of_nodes: int,
processes_per_node: int, processes_per_node: int,
memory: float, memory: float,
precision: int, precision: int,
size_q: int, size_q: int,
safety_factor: float, safety_factor: float,
name: str, name: str,
prioritize_balanced_decompositions: bool = False, prioritize_balanced_decompositions: bool = False,
max_number_of_candidates: int = 6, max_number_of_candidates: int = 6,
min_distance_between_candidates: float = 2.0, min_distance_between_candidates: float = 2.0,
debug: bool = False, debug: bool = False,
) -> Tuple[np.ndarray, np.ndarray]: ) -> Tuple[np.ndarray, np.ndarray]:
""" """
Suggests a set of 3D problem decompositions that are especially suited for single node performance tests. Suggests a set of 3D problem decompositions that are especially suited for single node performance tests.
...@@ -147,7 +176,10 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t ...@@ -147,7 +176,10 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t
# - create all possible combinations of candidates, i.e., the number of combinations is binomial_coefficient(k=3, n=len(n_j_candidates)) # - create all possible combinations of candidates, i.e., the number of combinations is binomial_coefficient(k=3, n=len(n_j_candidates))
# decomposition_candidates = np.argsort(np.array(combinations_with_replacement(n_j_candidates, 3)), axis=1) # decomposition_candidates = np.argsort(np.array(combinations_with_replacement(n_j_candidates, 3)), axis=1)
list_of_scaling_processes = processes_per_node * 2**np.arange(ceil(log2(max_number_of_nodes+1))) list_of_scaling_processes = processes_per_node * 2 ** np.arange(
ceil(log2(max_number_of_nodes + 1))
)
def valid_candidate_generator(): def valid_candidate_generator():
# - check which candidate combinations suffice the memory limit # - check which candidate combinations suffice the memory limit
# - check if I can bisect my problem until P_max # - check if I can bisect my problem until P_max
...@@ -165,11 +197,15 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t ...@@ -165,11 +197,15 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t
# [candidate for _, candidate in zip(range(max_number_of_candidates), valid_candidate_generator())] # [candidate for _, candidate in zip(range(max_number_of_candidates), valid_candidate_generator())]
# ) # )
valid_n_j_candidates = np.array(list(valid_candidate_generator())) valid_n_j_candidates = np.array(list(valid_candidate_generator()))
reordered_candidates_for_scaling = valid_n_j_candidates[np.argsort(np.ptp(valid_n_j_candidates, axis=1))] reordered_candidates_for_scaling = valid_n_j_candidates[
np.argsort(np.ptp(valid_n_j_candidates, axis=1))
]
# valid_candidates_for_scaling = valid_n_j_candidates[np.argsort(np.prod(valid_n_j_candidates, axis=1))[::-1][:max_number_of_candidates]] # valid_candidates_for_scaling = valid_n_j_candidates[np.argsort(np.prod(valid_n_j_candidates, axis=1))[::-1][:max_number_of_candidates]]
def thin_out_candidates(candidates: np.ndarray, min_distance_between_candidates: float) -> np.ndarray: def thin_out_candidates(
candidates: np.ndarray, min_distance_between_candidates: float
) -> np.ndarray:
""" """
Thin out candidates by removing those that are too close to each other based on the min_distance_between_candidates. Thin out candidates by removing those that are too close to each other based on the min_distance_between_candidates.
Prioritize keeping more equally distributed candidates. Prioritize keeping more equally distributed candidates.
...@@ -177,78 +213,179 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t ...@@ -177,78 +213,179 @@ def suggest_best_3d_problem_decompositions_for_initial_single_node_performance_t
# FIXME what I actually want is not to have a relative change from one to the other. I want the memory utilization to be uniformly distributed or rather normally with the peak at the max utilization. # FIXME what I actually want is not to have a relative change from one to the other. I want the memory utilization to be uniformly distributed or rather normally with the peak at the max utilization.
thinned_candidates = [] thinned_candidates = []
for candidate in candidates: for candidate in candidates:
if all(np.prod(candidate) / np.prod(existing) > min_distance_between_candidates or np.prod(existing) / np.prod(candidate) > min_distance_between_candidates for existing in thinned_candidates): if all(
np.prod(candidate) / np.prod(existing) > min_distance_between_candidates
or np.prod(existing) / np.prod(candidate)
> min_distance_between_candidates
for existing in thinned_candidates
):
thinned_candidates.append(candidate) thinned_candidates.append(candidate)
return np.array(thinned_candidates) return np.array(thinned_candidates)
# Thin out valid_candidates_for_scaling # Thin out valid_candidates_for_scaling
reordered_candidates_for_scaling = thin_out_candidates(reordered_candidates_for_scaling, min_distance_between_candidates) reordered_candidates_for_scaling = thin_out_candidates(
reordered_candidates_for_scaling, min_distance_between_candidates
)
# Reorder the thinned candidates to prioritize balance # Reorder the thinned candidates to prioritize balance
# - discard all but the first `max_number_of_candidates` combinations which product is closest to N_max # - discard all but the first `max_number_of_candidates` combinations which product is closest to N_max
valid_candidates_for_scaling = reordered_candidates_for_scaling[np.argsort(np.prod(reordered_candidates_for_scaling, axis=1))[::-1][:max_number_of_candidates]] valid_candidates_for_scaling = reordered_candidates_for_scaling[
reordered_candidates_for_scaling = valid_candidates_for_scaling[np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))] np.argsort(np.prod(reordered_candidates_for_scaling, axis=1))[::-1][
:max_number_of_candidates
]
]
reordered_candidates_for_scaling = valid_candidates_for_scaling[
np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))
]
# - sort them to be as equally distributed as possible # - sort them to be as equally distributed as possible
# reordered_candidates_for_scaling = valid_candidates_for_scaling[np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))] # reordered_candidates_for_scaling = valid_candidates_for_scaling[np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))]
reordered_candidates_for_scaling = valid_candidates_for_scaling[np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))] reordered_candidates_for_scaling = valid_candidates_for_scaling[
np.argsort(np.ptp(valid_candidates_for_scaling, axis=1))
]
# reordered_candidates_for_scaling = valid_n_j_candidates[np.argsort(np.ptp(valid_n_j_candidates, axis=1))[:max_number_of_candidates]] # reordered_candidates_for_scaling = valid_n_j_candidates[np.argsort(np.ptp(valid_n_j_candidates, axis=1))[:max_number_of_candidates]]
# - denote if P_max can be included or not # - denote if P_max can be included or not
index_of_scaling_candidates_that_allow_to_include_P_max = np.where(np.prod(valid_candidates_for_scaling, axis=1) % (processes_per_node * max_number_of_nodes) == 0)[0] index_of_scaling_candidates_that_allow_to_include_P_max = np.where(
index_of_scaling_candidates_that_allow_to_include_P_max_reordered = np.where(np.prod(reordered_candidates_for_scaling, axis=1) % (processes_per_node * max_number_of_nodes) == 0)[0] np.prod(valid_candidates_for_scaling, axis=1)
% (processes_per_node * max_number_of_nodes)
== 0
)[0]
index_of_scaling_candidates_that_allow_to_include_P_max_reordered = np.where(
np.prod(reordered_candidates_for_scaling, axis=1)
% (processes_per_node * max_number_of_nodes)
== 0
)[0]
# DEBUG block # DEBUG block
if debug: if debug:
relation_factor_n = np.max(valid_candidates_for_scaling[0]) relation_factor_n = np.max(valid_candidates_for_scaling[0])
print("\n", 50*"-", "\nInformation for best suited candidate on ", name) print("\n", 50 * "-", "\nInformation for best suited candidate on ", name)
print(f"It is possible to allocate a total of N={N_max:.3e} cells.") print(f"It is possible to allocate a total of N={N_max:.3e} cells.")
print(f"It is possible to allocate a total of n={n_max:.3e} cells per process.") print(f"It is possible to allocate a total of n={n_max:.3e} cells per process.")
if not prioritize_balanced_decompositions: if not prioritize_balanced_decompositions:
utilization = (processes_per_node * np.prod(valid_candidates_for_scaling, axis=1)) / N_max utilization = (
unbalance_factor = np.maximum(0.01, np.ptp(valid_candidates_for_scaling, axis=1) / relation_factor_n) processes_per_node * np.prod(valid_candidates_for_scaling, axis=1)
) / N_max
unbalance_factor = np.maximum(
0.01, np.ptp(valid_candidates_for_scaling, axis=1) / relation_factor_n
)
# size_order_score = utilization / unbalance_factor # size_order_score = utilization / unbalance_factor
print("Utilization [without reordering] is ", utilization, " %." ) # print("Utilization [without reordering] is ", utilization, " %.")
print("Unbalance factor [without reordering] is ", unbalance_factor, " %." ) # print("Unbalance factor [without reordering] is ", unbalance_factor, " %.")
# print("Total score [without reordering] is ", size_order_score, "." ) # print("Total score [without reordering] is ", size_order_score, "." )
print("All available nodes can be included for candidates of index [without reordering]: ", index_of_scaling_candidates_that_allow_to_include_P_max) # print(
print(*[f"{i:2}: {tuple(elem)},\t# N={processes_per_node * np.prod(elem):.3e} ~ {util:5.1%} | {balance:5.1%}" for i, (util, balance, elem) in enumerate(zip(utilization, unbalance_factor, valid_candidates_for_scaling))], sep='\n') # "All available nodes can be included for candidates of index [without reordering]: ",
# index_of_scaling_candidates_that_allow_to_include_P_max,
# )
print(
*[
f"""{i:2}: {tuple(elem)},\t"""
f"""# N={processes_per_node * np.prod(elem):.3e} ~ {util:5.1%} | """
f"""{balance:5.1%}"""
f"""\t{'# ' + str(max_number_of_nodes) + ' nodes included' if i in index_of_scaling_candidates_that_allow_to_include_P_max else ''}"""
for i, (util, balance, elem) in enumerate(
zip(utilization, unbalance_factor, valid_candidates_for_scaling)
)
],
sep="\n",
)
elif prioritize_balanced_decompositions: elif prioritize_balanced_decompositions:
utilization_reordered = (processes_per_node * np.prod(reordered_candidates_for_scaling, axis=1)) / N_max utilization_reordered = (
unbalance_factor_reordered = np.maximum(0.01, np.ptp(reordered_candidates_for_scaling, axis=1) / relation_factor_n) processes_per_node * np.prod(reordered_candidates_for_scaling, axis=1)
) / N_max
unbalance_factor_reordered = np.maximum(
0.01,
np.ptp(reordered_candidates_for_scaling, axis=1) / relation_factor_n,
)
# balance_order_score = utilization_reordered / unbalance_factor_reordered # balance_order_score = utilization_reordered / unbalance_factor_reordered
print("Utilization [with reordering] is ", utilization_reordered, " %." ) # print(
print("Unbalance factor [with reordering] is ", unbalance_factor_reordered, " %.") # "Utilization [with reordering] is ",
# utilization_reordered,
# " %.",
# )
# print(
# "Unbalance factor [with reordering] is ",
# unbalance_factor_reordered,
# " %.",
# )
# print("Total score [with reordering] is ", balance_order_score, "." ) # print("Total score [with reordering] is ", balance_order_score, "." )
print("All available nodes can be included for candidates of index [with reordering]: ", index_of_scaling_candidates_that_allow_to_include_P_max_reordered) # print(
print(*[f"{i:2}: {tuple(elem)},\t# N={processes_per_node * np.prod(elem):.3e} ~ {util:5.1%} | {balance:5.1%}" for i, (util, balance, elem) in enumerate(zip(utilization_reordered, unbalance_factor_reordered, reordered_candidates_for_scaling))], sep='\n') # "All available nodes can be included for candidates of index [with reordering]: ",
print("Use 3-5 of these candidates for single node performance tests. Then use the best one for scaling.") # index_of_scaling_candidates_that_allow_to_include_P_max_reordered,
# )
# TODO create functions out of the sub-steps print(
*[
f"""{i:2}: {tuple(elem)},\t"""
f"""# N={processes_per_node * np.prod(elem):.3e} ~ {util:5.1%} | """
f"""{balance:5.1%}"""
f"""\t{'# ' + str(max_number_of_nodes) + ' nodes included' if i in index_of_scaling_candidates_that_allow_to_include_P_max_reordered else ''}"""
for i, (util, balance, elem) in enumerate(
zip(
utilization_reordered,
unbalance_factor_reordered,
reordered_candidates_for_scaling,
)
)
],
sep="\n",
)
print(
"Use 3-5 of these candidates for single node performance tests. Then use the best one for scaling."
)
# 100 nodes included
# TODO create functions out of the sub-steps
if prioritize_balanced_decompositions: if prioritize_balanced_decompositions:
return reordered_candidates_for_scaling, index_of_scaling_candidates_that_allow_to_include_P_max_reordered return (
reordered_candidates_for_scaling,
index_of_scaling_candidates_that_allow_to_include_P_max_reordered,
)
else: else:
return valid_candidates_for_scaling, index_of_scaling_candidates_that_allow_to_include_P_max return (
valid_candidates_for_scaling,
index_of_scaling_candidates_that_allow_to_include_P_max,
)
if __name__ == "__main__": if __name__ == "__main__":
# Example usage # Example usage
for max_number_of_nodes, processes_per_node, memory, precision, size_q, safety_factor, name in [ for (
max_number_of_nodes,
processes_per_node,
memory,
precision,
size_q,
safety_factor,
name,
) in [
# ( 100, 112, 2.56e11, 8, 27, 1.4, "MareNostrum5-GPP"), # ( 100, 112, 2.56e11, 8, 27, 1.4, "MareNostrum5-GPP"),
# ( 100, 1, 6.40e10, 8, 19, 1.2, "MareNostrum5-ACC"), # NOTE: you have 4 times the GPUs but each one has it's own memory # ( 100, 1, 6.40e10, 8, 19, 1.2, "MareNostrum5-ACC"), # NOTE: you have 4 times the GPUs but each one has it's own memory
# ( 100, 1, 6.40e10, 8, 27, 1.2, "MareNostrum5-ACC"), # NOTE: you have 4 times the GPUs but each one has it's own memory
# ( 32, 48, 3.20e10, 8, 27, 1.4, "MareNostrum5-ACC"), # NOTE: you have 4 times the GPUs but each one has it's own memory
# ( 512, 128, 2.56e11, 8, 27, 1.4, "Q27-LUMI-C"), # ( 512, 128, 2.56e11, 8, 27, 1.4, "Q27-LUMI-C"),
# ( 512, 128, 2.56e11, 8, 19, 1.4, "Q19-LUMI-C"), # ( 512, 128, 2.56e11, 8, 19, 1.4, "Q19-LUMI-C"),
# ( 512, 128, 2.56e11, 8, 15, 1.4, "Q15-LUMI-C"), # ( 512, 128, 2.56e11, 8, 15, 1.4, "Q15-LUMI-C"),
# ( 128, 128, 5.12e11, 8, 27, 1.4, "LUMI-C-big"), # ( 128, 128, 5.12e11, 8, 27, 1.4, "LUMI-C-big"),
(1024, 2, 1.28e11, 8, 19, 1.2, "LUMI-G"), # NOTE: you have 8 times the GPUs but each one has it's own memory # (1024, 2, 1.28e11, 8, 19, 1.2, "LUMI-G"), # NOTE: you have 8 times the GPUs but each one has it's own memory
# (1024, 2, 1.28e11, 8, 26, 1.2, "MTW-LUMI-G"), # NOTE: you have 8 times the GPUs but each one has it's own memory # (1024, 2, 1.28e11, 8, 26, 1.2, "MTW-LUMI-G"), # NOTE: you have 8 times the GPUs but each one has it's own memory
(128, 128, 1.00e09, 8, 19, 1.2, "Deucalion-ARM"), # FIXME add the right parameters for Deucalion
]: ]:
_, _ = suggest_best_3d_problem_decompositions_for_initial_single_node_performance_test( _, _ = (
max_number_of_nodes, processes_per_node, memory, precision, size_q, safety_factor, name, max_number_of_candidates=20, min_distance_between_candidates=1.3, debug=True suggest_best_3d_problem_decompositions_for_initial_single_node_performance_test(
max_number_of_nodes,
processes_per_node,
memory,
precision,
size_q,
safety_factor,
name,
max_number_of_candidates=20,
min_distance_between_candidates=1.2,
prioritize_balanced_decompositions=False,
debug=True,
)
) )
# Fluid Q19 # Fluid Q19
# Temperature Q7 # Temperature Q7
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment