From 991e906dc29b758f4dbc8bafd401d0dac718b541 Mon Sep 17 00:00:00 2001
From: Behzad Safaei <iwia103h@a0221.nhr.fau.de>
Date: Thu, 20 Mar 2025 01:23:20 +0100
Subject: [PATCH] Add benchmark case for load balancing

---
 examples/benchmarks/sd_static.cpp            |  25 +--
 examples/benchmarks/sd_static_triangular.cpp | 164 +++++++++++++++++++
 examples/benchmarks/spring_dashpot_no_pbc.py |  96 +++++++++++
 runtime/utility/dem_sc_grid.cpp              |  11 +-
 runtime/utility/dem_sc_grid.hpp              |   6 +-
 5 files changed, 278 insertions(+), 24 deletions(-)
 create mode 100644 examples/benchmarks/sd_static_triangular.cpp
 create mode 100644 examples/benchmarks/spring_dashpot_no_pbc.py

diff --git a/examples/benchmarks/sd_static.cpp b/examples/benchmarks/sd_static.cpp
index c526bf1..4396fb6 100644
--- a/examples/benchmarks/sd_static.cpp
+++ b/examples/benchmarks/sd_static.cpp
@@ -1,22 +1,8 @@
 #include <iostream>
 #include <memory>
-
-#include "spring_dashpot.hpp"
 #include <chrono>
 
-void set_feature_properties(std::shared_ptr<PairsAccessor> &ac){
-    ac->setTypeStiffness(0,0, 1e6);
-    ac->syncTypeStiffness();
-
-    ac->setTypeDampingNorm(0,0, 300);
-    ac->syncTypeDampingNorm();
-
-    ac->setTypeFriction(0,0, 1.2);
-    ac->syncTypeFriction();
-
-    ac->setTypeDampingTan(0,0, 300);
-    ac->syncTypeDampingTan();
-}
+#include "spring_dashpot.hpp"
 
 int main(int argc, char **argv) {
     if(argc!=5){
@@ -30,16 +16,13 @@ int main(int argc, char **argv) {
     auto pairs_sim = std::make_shared<PairsSimulation>();
     pairs_sim->initialize();
 
-    auto ac = std::make_shared<PairsAccessor>(pairs_sim.get());
-    set_feature_properties(ac);     // Arbitray properties (all forces cancel out due to PBC in all dims)
-
     auto pairs_runtime = pairs_sim->getPairsRuntime();
 
     pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, domain_size[0], domain_size[1], domain_size[2], false);
 
     double particle_spacing = 1.0;
 
-    // Particle overlap is required for force calculation
+    // Particle overlap is required for force calculation (but forces remain zero since stiffnesses are zero by default)
     double peneration_depth = 0.01;  
     double diameter = particle_spacing + peneration_depth;
     
@@ -99,8 +82,8 @@ int main(int argc, char **argv) {
         std::cout << "PUPS: " << pups << std::endl;
     }
     
-    // if (rank==1) pairs::vtk_write_data(pairs_runtime, "output/ghost_spheres", pairs_sim->nlocal(), pairs_sim->size(), 0);
-    // if (rank==2) pairs::vtk_write_data(pairs_runtime, "output/ghost_spheres", pairs_sim->nlocal(), pairs_sim->size(), 0);
+    pairs::vtk_write_data(pairs_runtime, "output/local_spheres", 0, pairs_sim->nlocal(), 0);
+    pairs::vtk_write_data(pairs_runtime, "output/ghost_spheres", pairs_sim->nlocal(), pairs_sim->size(), 0);
     
     pairs_sim->end();
 }
\ No newline at end of file
diff --git a/examples/benchmarks/sd_static_triangular.cpp b/examples/benchmarks/sd_static_triangular.cpp
new file mode 100644
index 0000000..05323a2
--- /dev/null
+++ b/examples/benchmarks/sd_static_triangular.cpp
@@ -0,0 +1,164 @@
+#include <iostream>
+#include <memory>
+#include <chrono>
+
+#include "spring_dashpot_no_pbc.hpp"
+
+template<typename Type>
+void print_global_stats(std::string name, Type value, MPI_Datatype mpi_type, MPI_Comm comm){
+    int rank, world_size;
+    MPI_Comm_rank(comm, &rank);
+    MPI_Comm_size(comm, &world_size);
+
+    std::vector<Type> all_values(world_size);
+    MPI_Gather(&value, 1, mpi_type, all_values.data(), 1, mpi_type, 0, comm);
+
+    if(rank == 0){
+        std::sort(all_values.begin(), all_values.end());
+        Type sum = std::accumulate(all_values.begin(), all_values.end(), 0.0);
+        double avg = static_cast<double>(sum)/world_size;
+
+        // Standard deviation ------------------
+        double sum_sq = 0.0;
+        for(const auto &v : all_values){
+            sum_sq += (static_cast<double>(v) - avg) * (static_cast<double>(v) - avg);
+        }
+        double std_dev = std::sqrt(sum_sq / world_size);
+
+        // Median ------------------------------
+        double median = 0.0;
+        if(world_size%2 == 0){
+            median = (all_values[world_size/2 - 1] + all_values[world_size/2]) / 2.0;   
+        } else{
+            median = all_values[world_size/2];
+        }
+
+        std::cout << "-----------------------------------" << std::endl;
+        std::cout << name + "_MIN: " << all_values[0] << std::endl;
+        std::cout << name + "_MAX: " << all_values[world_size - 1] << std::endl;
+        std::cout << name + "_SUM: " << sum << std::endl;
+        std::cout << name + "_AVG: " << avg << std::endl;
+        std::cout << name + "_MED: " << median << std::endl;
+        std::cout << name + "_STDDEV: " << std_dev << std::endl;
+        std::cout << "-----------------------------------" << std::endl;
+    }
+}
+
+void print_usage(){
+    std::cerr << "Invalid args." << std::endl;
+    std::cerr << "Required args: X Y Z #Timesteps LoadBalanced(bool)" << std::endl;
+    std::cerr << "If LoadBalanced, 3 addition args are required: Algorithm(hilbert/morton) RegridMin RegridMax." << std::endl;
+    exit(-1);
+}
+
+int main(int argc, char **argv) {
+    if(argc<6) print_usage();
+
+    double domain_size[3] = {std::stod(argv[1]), std::stod(argv[2]), std::stod(argv[3])};
+    uint64_t num_timesteps = std::stoull(argv[4]);
+
+    auto pairs_sim = std::make_shared<PairsSimulation>();
+    pairs_sim->initialize();
+
+    auto pairs_runtime = pairs_sim->getPairsRuntime();
+
+    bool load_balanced = (std::stoi(argv[5]) != 0);
+    pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, domain_size[0], domain_size[1], domain_size[2], load_balanced);
+
+    if(load_balanced){
+        if(argc!=9) print_usage();
+
+        std::string alg_str = argv[6];
+        LoadBalancingAlgorithms alg;
+
+        if(alg_str == "hilbert") alg = pairs::Hilbert;
+        else if (alg_str == "morton") alg = pairs::Morton;
+        else {std::cerr << "Invalid rebalancing algorithm." << std::endl; print_usage();}
+
+        int regrid_min = std::stoi(argv[7]);
+        int regrid_max = std::stoi(argv[8]);
+        pairs_runtime->getDomainPartitioner()->initWorkloadBalancer(alg, regrid_min, regrid_max);
+    } else {
+        if(argc!=6) print_usage();
+    }
+
+    double particle_spacing = 1.0;
+
+    // Particle overlap is required for force calculation (but stiffnesses are zero by default, so forces remain zero)
+    double peneration_depth = 0.01;  
+    double diameter = particle_spacing + peneration_depth;
+    
+    double initial_velocity = 0.0;  // Stationary 
+    double density = 1000;          // Arbitrary
+    double lower_tirangular = true;
+    
+    pairs::dem_sc_grid(pairs_runtime,   domain_size[0], domain_size[1], domain_size[2],
+                                        particle_spacing, 
+                                        diameter, diameter, diameter,
+                                        initial_velocity, density, 1, lower_tirangular);
+    
+    double cell_width = diameter;
+    pairs_sim->setup_cells(cell_width, cell_width, cell_width, cell_width);
+
+    // Inertia update is required for euler updates to be valid (but particles remain stationary)
+    pairs_sim->update_mass_and_inertia(); 
+    double dt = 0.001;  // Arbitrary
+    int print_interval = (num_timesteps >= 5) ? (num_timesteps / 5) : 1;
+    
+    // Update domain so stats become available (does rebalancing if rebalance is ture)
+    pairs_sim->update_domain();
+
+    // Stats
+    // ------------------------------------------------------------------------------
+    int rank = pairs_sim->rank();
+    int world_size = pairs_runtime->getDomainPartitioner()->getWorldSize();
+
+    int num_neigh_ranks = pairs_runtime->getDomainPartitioner()->getNumberOfNeighborRanks();
+    int num_neigh_aabbs = pairs_runtime->getDomainPartitioner()->getNumberOfNeighborAABBs();
+    uint64_t nlocal = pairs_sim->nlocal();
+    uint64_t nghost = pairs_sim->nghost();
+    print_global_stats("NUM_NEIGH_RANKS", num_neigh_ranks, MPI_INT, MPI_COMM_WORLD);
+    print_global_stats("NUM_NEIGH_AABBS", num_neigh_aabbs, MPI_INT, MPI_COMM_WORLD);
+    print_global_stats("NLOCAL", nlocal, MPI_UINT64_T, MPI_COMM_WORLD);
+    print_global_stats("NGHOST", nghost, MPI_UINT64_T, MPI_COMM_WORLD);
+
+    if(rank==0){
+        std::cout << "NUM_PROC: " << world_size << std::endl;
+        std::cout << "NUM_TIMESTEPS: " << num_timesteps << std::endl;
+    }
+
+    // ------------------------------------------------------------------------------
+    // Timestep Loop
+    // ------------------------------------------------------------------------------
+    MPI_Barrier(MPI_COMM_WORLD);
+    auto start = std::chrono::high_resolution_clock::now();
+
+    for (int t=0; t<num_timesteps; ++t){
+        if ((t%print_interval==0) && rank==0) std::cout << "Timestep: " << t << std::endl;
+        pairs_sim->communicate(t);
+        pairs_sim->update_cells(t);
+        pairs_sim->spring_dashpot();
+        pairs_sim->euler(dt);
+    }
+
+    auto end = std::chrono::high_resolution_clock::now();
+    // ------------------------------------------------------------------------------
+    
+    uint64_t global_nparticles;
+    MPI_Reduce(&nlocal, &global_nparticles, 1, MPI_UINT64_T, MPI_SUM, 0, MPI_COMM_WORLD);
+    if(rank==0) {
+        auto duration = std::chrono::duration<double>(end - start);
+        double total_runtime = duration.count(); // seconds
+        std::cout << "TOTAL_RUNTIME: " << total_runtime << std::endl;
+        std::cout << "GLOBAL_NPARTICLES: " << global_nparticles << std::endl;
+        
+        double pups = global_nparticles * num_timesteps / total_runtime;    // particle updates per second
+        std::cout << "PUPS: " << pups << std::endl;
+    }
+    
+    // pairs::vtk_write_subdom(pairs_runtime, "output/sd_subdom", 0);
+    // pairs::vtk_write_data(pairs_runtime, "output/sd_local", 0, pairs_sim->nlocal(), 0);
+    // pairs::vtk_write_data(pairs_runtime, "output/sd_ghost", pairs_sim->nlocal(), pairs_sim->size(), 0);
+
+    pairs_sim->end();
+}
\ No newline at end of file
diff --git a/examples/benchmarks/spring_dashpot_no_pbc.py b/examples/benchmarks/spring_dashpot_no_pbc.py
new file mode 100644
index 0000000..d0d43f6
--- /dev/null
+++ b/examples/benchmarks/spring_dashpot_no_pbc.py
@@ -0,0 +1,96 @@
+import math
+import pairs
+import sys
+import os
+
+def update_mass_and_inertia(i):
+    rotation_matrix[i] = diagonal_matrix(1.0)
+    rotation[i] = default_quaternion()
+
+    if is_sphere(i):
+        inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i]))
+
+    else:
+        mass[i] = infinity
+        inv_inertia[i] = 0.0
+
+def spring_dashpot(i, j):
+    delta_ij = -penetration_depth(i, j)
+    skip_when(delta_ij < 0.0)     
+    
+    velocity_wf_i = linear_velocity[i] + cross(angular_velocity[i], contact_point(i, j) - position[i])
+    velocity_wf_j = linear_velocity[j] + cross(angular_velocity[j], contact_point(i, j) - position[j])
+    
+    rel_vel = -(velocity_wf_i - velocity_wf_j)
+    rel_vel_n = dot(rel_vel, contact_normal(i, j))
+    rel_vel_t = rel_vel - rel_vel_n * contact_normal(i, j)
+
+    fNabs = stiffness[i,j] * delta_ij + damping_norm[i,j] * rel_vel_n
+    fN = fNabs * contact_normal(i, j)
+
+    fTabs = min(damping_tan[i,j] * length(rel_vel_t), friction[i, j] * fNabs)
+    fT = fTabs * normalized(rel_vel_t)
+
+    partial_force = fN + fT
+    apply(force, partial_force)
+    apply(torque, cross(contact_point(i, j) - position, partial_force))
+
+def euler(i):
+    skip_when(is_fixed(i) or is_infinite(i))
+    inv_mass = 1.0 / mass[i]
+    position[i] +=  0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt
+    linear_velocity[i] += inv_mass * force[i] * dt
+    wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i])
+    phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt
+    rotation[i] = quaternion(phi, length(phi)) * rotation[i]
+    rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i])
+    angular_velocity[i] += wdot * dt
+
+
+file_name = os.path.basename(__file__)
+file_name_without_extension = os.path.splitext(file_name)[0]
+
+psim = pairs.simulation(
+    file_name_without_extension,
+    [pairs.sphere()],
+    double_prec=True,
+    particle_capacity=10000000,
+    debug=True)
+
+target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
+if target == 'gpu':
+    psim.target(pairs.target_gpu())
+elif target == 'cpu':
+    psim.target(pairs.target_cpu())
+else:
+    print(f"Invalid target, use {sys.argv[0]} <cpu/gpu>")
+
+psim.add_position('position')
+psim.add_property('mass', pairs.real())
+psim.add_property('linear_velocity', pairs.vector())
+psim.add_property('angular_velocity', pairs.vector())
+psim.add_property('force', pairs.vector(), volatile=True)
+psim.add_property('torque', pairs.vector(), volatile=True)
+psim.add_property('radius', pairs.real())
+psim.add_property('inv_inertia', pairs.matrix())
+psim.add_property('rotation_matrix', pairs.matrix())
+psim.add_property('rotation', pairs.quaternion())
+
+ntypes = 1
+psim.add_feature('type', ntypes)
+psim.add_feature_property('type', 'stiffness', pairs.real())
+psim.add_feature_property('type', 'damping_norm', pairs.real())
+psim.add_feature_property('type', 'damping_tan', pairs.real())
+psim.add_feature_property('type', 'friction', pairs.real())
+
+# psim.set_domain_partitioner(pairs.regular_domain_partitioner())
+psim.set_domain_partitioner(pairs.block_forest())
+psim.pbc([False, False, False])
+psim.build_cell_lists(use_halo_cells=False)
+
+psim.compute(update_mass_and_inertia, symbols={'infinity': math.inf })
+psim.compute(spring_dashpot, profile=False)
+psim.compute(euler, parameters={'dt': pairs.real()})
+
+psim.generate()
+
diff --git a/runtime/utility/dem_sc_grid.cpp b/runtime/utility/dem_sc_grid.cpp
index ffee437..9754145 100644
--- a/runtime/utility/dem_sc_grid.cpp
+++ b/runtime/utility/dem_sc_grid.cpp
@@ -21,7 +21,10 @@ bool point_within_aabb(double point[], double aabb[]) {
            point[2] >= aabb[2] && point[2] < aabb[5];
 }
 
-int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, double spacing, double diameter, double min_diameter, double max_diameter, double initial_velocity, double particle_density, int ntypes) {
+int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, 
+    double spacing, double diameter, double min_diameter, double max_diameter, 
+    double initial_velocity, double particle_density, int ntypes, bool lower_triangular) {
+        
     auto uids = ps->getAsUInt64Property(ps->getPropertyByName("uid"));
     auto shapes = ps->getAsIntegerProperty(ps->getPropertyByName("shape"));
     auto types = ps->getAsIntegerProperty(ps->getPropertyByName("type"));
@@ -59,7 +62,11 @@ int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, double
     while(point_within_aabb(point, gen_domain)) {
         auto pdiam = realRandom<real_t>(min_diameter, max_diameter);
 
-        if(ps->getDomainPartitioner()->isWithinSubdomain(point[0], point[1], point[2])) {
+        if(ps->getDomainPartitioner()->isWithinSubdomain(point[0], point[1], point[2]) &&
+            
+            // If lower_triangular is true, only particles below the diagonal are accepted (in the x-z planes) 
+            (lower_triangular ? ((point[2] <= (-zmax/xmax*point[0]+zmax))) : true)) {
+
             real_t rad = pdiam * 0.5;
             if(nparticles >= particle_capacity) {
                 std::cerr << "Number of particles exceeded capacity (" << particle_capacity << ") in rank " << ps->getDomainPartitioner()->getRank() << std::endl;
diff --git a/runtime/utility/dem_sc_grid.hpp b/runtime/utility/dem_sc_grid.hpp
index 9eb3462..f231a44 100644
--- a/runtime/utility/dem_sc_grid.hpp
+++ b/runtime/utility/dem_sc_grid.hpp
@@ -54,6 +54,10 @@ private:
 
 bool point_within_aabb(double point[], double aabb[]);
 
-int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, double spacing, double diameter, double min_diameter, double max_diameter, double initial_velocity, double particle_density, int ntypes);
+int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, 
+    double spacing, 
+    double diameter, double min_diameter, double max_diameter, 
+    double initial_velocity, double particle_density, int ntypes,
+    bool lower_triangular=false);
 
 }
-- 
GitLab