Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (12)
Showing
with 398 additions and 215 deletions
[submodule "external/walberla"]
path = external/walberla
url = https://i10git.cs.fau.de/walberla/walberla.git
......@@ -9,14 +9,9 @@ set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG")
option(USE_MPI "USE_MPI" ON)
option(USE_WALBERLA "Enable waLBerla support for using BlockForest domain partitioning and dynamic load balancing" OFF)
option(COMPILE_CUDA "COMPILE_CUDA" OFF)
option(GENERATE_WHOLE_PROGRAM "Generate the whole program (i.e. including the 'main' function). No additional source files are needed." OFF)
option(BUILD_APP "Build a stand-alone app which uses the P4IRS modular interface. Provide your source files with -DUSER_SOURCE_FILES" OFF)
if(GENERATE_WHOLE_PROGRAM AND BUILD_APP)
message(FATAL_ERROR "You must choose either GENERATE_WHOLE_PROGRAM or BUILD_APP or neither.\n
Choose neither if you only want to use the P4IRS library in your project (in a seperate build system).")
endif()
set(USER_SOURCE_FILES "" CACHE STRING "List of source files to compile (semicolon-separated)")
set(INPUT_SCRIPT ${INPUT_SCRIPT} CACHE PATH "The input python script triggering code generation")
if(NOT EXISTS ${INPUT_SCRIPT})
......@@ -42,7 +37,7 @@ file(MAKE_DIRECTORY ${OUTPUT_DIR})
#================================================================================
# TODO: Unify all interfaces
set(GEN_INTERFACE_DIR ${CMAKE_CURRENT_BINARY_DIR}/internal_interfaces)
set(GEN_INTERFACE_HEADER ${CMAKE_CURRENT_BINARY_DIR}/last_generated.hpp)
set(GEN_INTERFACE_HEADER ${GEN_INTERFACE_DIR}/last_generated.hpp)
file(MAKE_DIRECTORY ${GEN_INTERFACE_DIR})
#================================================================================
......@@ -50,14 +45,14 @@ file(MAKE_DIRECTORY ${GEN_INTERFACE_DIR})
#================================================================================
set(RUNTIME_COMMON_FILES
runtime/pairs.cpp
runtime/copper_fcc_lattice.cpp
runtime/create_body.cpp
runtime/dem_sc_grid.cpp
runtime/read_from_file.cpp
runtime/stats.cpp
runtime/thermo.cpp
runtime/timing.cpp
runtime/vtk.cpp
runtime/utility/copper_fcc_lattice.cpp
runtime/utility/create_body.cpp
runtime/utility/dem_sc_grid.cpp
runtime/utility/read_from_file.cpp
runtime/utility/stats.cpp
runtime/utility/thermo.cpp
runtime/utility/timing.cpp
runtime/utility/vtk.cpp
runtime/domain/regular_6d_stencil.cpp)
#================================================================================
......@@ -71,7 +66,7 @@ set(PAIRS_LINK_DIRS ${CMAKE_CURRENT_BINARY_DIR})
set(PAIRS_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR})
# The target can either be an executable or a static library
if(GENERATE_WHOLE_PROGRAM OR BUILD_APP)
if(USER_SOURCE_FILES)
add_executable(${PAIRS_TARGET} ${RUNTIME_COMMON_FILES})
else()
add_library(${PAIRS_TARGET} STATIC ${RUNTIME_COMMON_FILES})
......@@ -90,13 +85,7 @@ set_target_properties(${PAIRS_TARGET} PROPERTIES
#================================================================================
# USER_SOURCE_FILES =============================================================
#================================================================================
if(BUILD_APP)
set(USER_SOURCE_FILES "" CACHE STRING "List of source files to compile (semicolon-separated)")
if(NOT USER_SOURCE_FILES)
message(FATAL_ERROR "BUILD_APP is ON. You have to specify source files like this:\n
-DUSER_SOURCE_FILES=src/main.cpp;src/helper.cpp")
endif()
if(USER_SOURCE_FILES)
foreach(file ${USER_SOURCE_FILES})
if(NOT EXISTS ${file})
message(FATAL_ERROR "File '${file}' does not exist!")
......@@ -108,14 +97,8 @@ endif()
#================================================================================
# waLBerla ======================================================================
#================================================================================
set(WALBERLA_DIR ${WALBERLA_DIR} CACHE PATH "Path to waLBerla source directory (required only when using BlockForest partitioning).")
if(WALBERLA_DIR)
if(EXISTS "${WALBERLA_DIR}")
target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA)
else()
message(FATAL_ERROR "Invalid WALBERLA_DIR: '${WALBERLA_DIR}' does not exist.")
endif()
if(USE_WALBERLA)
target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA)
set(RUNTIME_WALBERLA_FILES
runtime/domain/block_forest.cpp
......@@ -123,18 +106,19 @@ if(WALBERLA_DIR)
# TODO: Generate the host/device functions for computing weights
if(COMPILE_CUDA)
list(APPEND RUNTIME_WALBERLA_FILES runtime/boundary_weights.cu)
list(APPEND RUNTIME_WALBERLA_FILES runtime/domain/boundary_weights.cu)
else()
list(APPEND RUNTIME_WALBERLA_FILES runtime/boundary_weights.cpp)
list(APPEND RUNTIME_WALBERLA_FILES runtime/domain/boundary_weights.cpp)
endif()
target_sources(${PAIRS_TARGET} PRIVATE ${RUNTIME_WALBERLA_FILES})
## Linking walberla modules
set(PAIRS_WALBERLA_DEPENDENCIES blockforest core pe)
find_package(waLBerla REQUIRED)
set(WALBERLA_LINK_LIBRARIES_KEYWORD PUBLIC)
target_link_modules(${PAIRS_TARGET} ${PAIRS_WALBERLA_DEPENDENCIES}) # This is a walberla helper function
add_subdirectory(external/walberla EXCLUDE_FROM_ALL)
waLBerla_import()
# Recent issue from the waLBerla side: warnings about empty CUDA_ARCHITECTURES for walberla_gpu
set(PAIRS_WALBERLA_DEPENDENCIES walberla::blockforest walberla::core)
target_link_libraries(${PAIRS_TARGET} PUBLIC ${PAIRS_WALBERLA_DEPENDENCIES})
## TODO: PAIRS_LINK_DIRS and PAIRS_LINK_LIBRARIES for walberla modules *AND* their dependencies
## This implemention only works if the consumer of the library is itself a walberla app (made within the build system of walberla)
......@@ -250,10 +234,12 @@ endif()
# LIKWID ========================================================================
#================================================================================
if(LIKWID_DIR)
# Example: -DLIKWID_DIR=/apps/likwid/5.4.0
target_compile_options(${PAIRS_TARGET} PRIVATE -DLIKWID_PERFMON -pthread)
target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.a)
list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.a)
target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.so)
list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.so)
include_directories(${LIKWID_DIR}/include)
list(APPEND PAIRS_INCLUDE_DIRS "${LIKWID_DIR}/include")
......
......@@ -100,57 +100,34 @@ else:
psim.generate()
```
## Build instructions
## Build Instructions
P4IRS can be built in 3 different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment.
P4IRS can be built in two different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment.
In the following, we assume we have created and navigated to a build directory: `mkdir build; cd build`
**General CMake flags (applicable to all 3 modes):**
**Basic CMake flags:**
* Pass your input script to CMake using `-DINPUT_SCRIPT=path/to/script.py`
* Enable CUDA with `-DCOMPILE_CUDA=ON`
* Enable support for BlockForest domain partitioning and dynamic load balancing by providing the path to waLBerla source directory `-DWALBERLA_DIR=path/to/walberla` (TODO: waLBerla as a submodule)
* Enable waLBerla support with `-DUSE_WALBERLA=ON` for using BlockForest domain partitioning and dynamic load balancing
### 1. Whole-program generation:
### 1. Stand-Alone P4IRS Application
---------------------
In this mode, everything including the `main` function is generated by P4IRS.
To build a C++ application using P4IRS, provide the list of your source files to CMake using the `-DUSER_SOURCE_FILES` flag (semicolon-seperated).
1. Set `generate_whole_program=True` in the input script
2. Set the CMake flag `-DGENERATE_WHOLE_PROGRAM=ON`
**Example**: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) using [spring_dashpot.py](examples/modular/spring_dashpot.py) as the input script.
Example: Build [md.py](examples/whole-program-generation/md.py)
```
cmake -DINPUT_SCRIPT=../examples/whole-program-generation/md.py -DGENERATE_WHOLE_PROGRAM=ON ..
```
Now call `make` and an **executable** is built.
### 2. Modular stand-alone app
---------------------
You can build a stand-alone C++ app which uses the P4IRS modular interface.
1. Set `generate_whole_program=False` in the input script
2. Set the CMake flag `-DBUILD_APP=ON`
3. Provide the list of your source files to CMake (semicolon-seperated):`-DUSER_SOURCE_FILES=path/to/main.cpp;path/to/helper.cpp`
Example: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) with [spring_dashpot.py](examples/modular/spring_dashpot.py)
Note: In this example we assume waLBerla has been already cloned next to the P4IRS directory.
```
cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DBUILD_APP=ON -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DWALBERLA_DIR=../../walberla ..
cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DUSE_WALBERLA=ON ..
```
Now call `make` and an **executable** is built.
### 3. P4IRS as a library
### 2. P4IRS as a Library
---------------------
In this mode, P4IRS is compiled as a library that can be integrated into other projects.
1. Set `generate_whole_program=False` in the input script
2. Ensure both `BUILD_APP` and `GENERATE_WHOLE_PROGRAM` are `OFF` (they are OFF by default)
Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows:
P4IRS can also be compiled as a library for integration into larger projects.
To compile P4IRS as a library, simply do not pass any `USER_SOURCE_FILES` to CMake. Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows:
```cmake
find_package(pairs REQUIRED HINTS "path/to/pairs/build" NO_DEFAULT_PATH)
target_include_directories(my_app PUBLIC ${PAIRS_INCLUDE_DIRS})
......
if ( WALBERLA_DIR )
# WALBERLA_DIR has to point to the waLBerla source directory
# this command builds waLBerla (again) in the current build directory in the subfolder "walberla" (second argument)
add_subdirectory( ${WALBERLA_DIR} walberla EXCLUDE_FROM_ALL)
waLBerla_import()
# Adds the 'src' and 'tests' directory of current app
list( APPEND WALBERLA_MODULE_DIRS "${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/tests" )
list( REMOVE_DUPLICATES WALBERLA_MODULE_DIRS )
set ( WALBERLA_MODULE_DIRS ${WALBERLA_MODULE_DIRS} CACHE INTERNAL "All folders that contain modules or tests" )
else()
message( FATAL_ERROR "waLBerla not found - Use 'cmake -DWALBERLA_DIR=path_to_waLBerla_sources pathToApplicationSources' " )
endif()
......@@ -15,8 +15,8 @@ int main(int argc, char **argv) {
// Create bodies
pairs::id_t pUid = pairs::create_sphere(pairs_runtime, 0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0);
// setup_sim after creating all bodies
pairs_sim->setup_sim();
// setup_cells after creating all bodies
pairs_sim->setup_cells();
pairs_sim->update_mass_and_inertia();
// Track particle
......
......@@ -58,8 +58,7 @@ psim = pairs.simulation(
double_prec=True,
particle_capacity=1000000,
neighbor_capacity=20,
debug=True,
generate_whole_program=False)
debug=True)
target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
......
......@@ -21,7 +21,7 @@ int main(int argc, char **argv) {
pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0);
pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0);
pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1);
pairs_sim->setup_cells(0.1, 0.1, 0.1, 0.1);
pairs_sim->update_mass_and_inertia();
int num_timesteps = 2000;
......
......@@ -42,7 +42,7 @@ int main(int argc, char **argv) {
pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0);
pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0);
pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1);
pairs_sim->setup_cells(0.1, 0.1, 0.1, 0.1);
pairs_sim->update_mass_and_inertia();
int num_timesteps = 2000;
......
......@@ -65,7 +65,7 @@ int main(int argc, char **argv) {
auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();};
pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1);
pairs_sim->setup_cells(0.1, 0.1, 0.1, 0.1);
pairs_sim->update_mass_and_inertia();
pairs_sim->communicate(0);
......
......@@ -41,7 +41,6 @@ int main(int argc, char **argv) {
pairs_runtime->initDomain(&argc, &argv,
0, 0, 0, 20, 20, 20, // Domain bounds
false, false, false, // PBCs --------------> TODO: runtime pbc
true // Enable dynamic load balancing (does initial refinement on a <1,1,1> blockforest)
);
......@@ -61,7 +60,7 @@ int main(int argc, char **argv) {
double lcw = diameter_max * 1.01; // Linked-cell width
double interaction_radius = diameter_max;
pairs_sim->setup_sim(lcw, lcw, lcw, interaction_radius);
pairs_sim->setup_cells(lcw, lcw, lcw, interaction_radius);
pairs_sim->update_mass_and_inertia();
......
#include <iostream>
#include <memory>
#include <iomanip>
#include "sphere_box_global.hpp"
// cmake -DINPUT_SCRIPT=../examples/modular/sphere_box_global.py -DWALBERLA_DIR=../../walberla -DBUILD_APP=ON -DUSER_SOURCE_FILES=../examples/modular/sphere_box_global.cpp -DCOMPILE_CUDA=ON ..
void set_feature_properties(std::shared_ptr<PairsAccessor> &ac){
ac->setTypeStiffness(0,0, 1e6);
ac->setTypeStiffness(0,1, 1e6);
ac->setTypeStiffness(1,0, 1e6);
ac->setTypeStiffness(1,1, 1e6);
ac->syncTypeStiffness();
ac->setTypeDampingNorm(0,0, 300);
ac->setTypeDampingNorm(0,1, 300);
ac->setTypeDampingNorm(1,0, 300);
ac->setTypeDampingNorm(1,1, 300);
ac->syncTypeDampingNorm();
ac->setTypeFriction(0,0, 1.2);
ac->setTypeFriction(0,1, 1.2);
ac->setTypeFriction(1,0, 1.2);
ac->setTypeFriction(1,1, 1.2);
ac->syncTypeFriction();
ac->setTypeDampingTan(0,0, 300);
ac->setTypeDampingTan(0,1, 300);
ac->setTypeDampingTan(1,0, 300);
ac->setTypeDampingTan(1,1, 300);
ac->syncTypeDampingTan();
}
int main(int argc, char **argv) {
auto pairs_sim = std::make_shared<PairsSimulation>();
pairs_sim->initialize();
auto ac = std::make_shared<PairsAccessor>(pairs_sim.get());
set_feature_properties(ac);
auto pairs_runtime = pairs_sim->getPairsRuntime();
pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 30, 30, 30, false, false, false, true);
pairs_runtime->getDomainPartitioner()->initWorkloadBalancer(pairs::Hilbert, 100, 800);
pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, pairs::flags::INFINITE | pairs::flags::FIXED);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, pairs::flags::INFINITE | pairs::flags::FIXED);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, pairs::flags::INFINITE | pairs::flags::FIXED);
pairs::create_halfspace(pairs_runtime, 30,30,30, -1, 0, 0, 0, pairs::flags::INFINITE | pairs::flags::FIXED);
pairs::create_halfspace(pairs_runtime, 30,30,30, 0, -1, 0, 0, pairs::flags::INFINITE | pairs::flags::FIXED);
pairs::create_halfspace(pairs_runtime, 30,30,30, 0, 0, -1, 0, pairs::flags::INFINITE | pairs::flags::FIXED);
double radius = 0.5;
// Create a bed of small particles
pairs::dem_sc_grid(pairs_runtime, 30, 20, 5, radius*2 , radius*2 , radius*2, radius*2, 2, 250, 2);
// Create 3 global bodies, one of which is fixed
pairs::create_box(pairs_runtime, 12, 12, 13.5, 0, 0, 0, 15, 2, 13, 20, 0, pairs::flags::GLOBAL);
pairs::create_sphere(pairs_runtime, 15, 20, 15, 0, 4, 0, 50, 4, 0, pairs::flags::GLOBAL);
pairs::create_sphere(pairs_runtime, 15, 25, 4, 0, 0, 0, 50, 4, 0, pairs::flags::GLOBAL | pairs::flags::FIXED);
// Use the diameter of small particles to set up the cell list
double lcw = radius * 2;
pairs_sim->setup_sim(lcw, lcw, lcw, lcw);
pairs_sim->update_mass_and_inertia();
pairs_sim->communicate(0);
int num_timesteps = 20000;
int vtk_freq = 100;
int rebalance_freq = 2000;
double dt = 0.001;
pairs::vtk_write_subdom(pairs_runtime, "output/subdom_init", 0);
for (int t=0; t<num_timesteps; ++t){
if ((t % vtk_freq==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl;
if (t % rebalance_freq == 0){
pairs_sim->update_domain();
}
pairs_sim->update_cells(t);
pairs_sim->gravity();
// All global and local interactions are contained within the 'spring_dashpot' module
// You have the option to call spring_dashpot before or after 'gravity' or any other force-update module
pairs_sim->spring_dashpot();
pairs_sim->euler(dt);
pairs_sim->communicate(t);
if (t % vtk_freq==0){
pairs::vtk_with_rotation(pairs_runtime, pairs::Shapes::Box, "output/local_boxes", 0, pairs_sim->nlocal(), t);
pairs::vtk_with_rotation(pairs_runtime, pairs::Shapes::Sphere, "output/local_spheres", 0, pairs_sim->nlocal(), t);
}
}
pairs_sim->end();
}
\ No newline at end of file
import math
import pairs
import sys
import os
def update_mass_and_inertia(i):
rotation_matrix[i] = diagonal_matrix(1.0)
rotation[i] = default_quaternion()
if is_sphere(i):
inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i]))
elif is_box(i):
inv_inertia[i] = inversed(diagonal_matrix (
edge_length[i][1]*edge_length[i][1] + edge_length[i][2]*edge_length[i][2],
edge_length[i][0]*edge_length[i][0] + edge_length[i][2]*edge_length[i][2],
edge_length[i][0]*edge_length[i][0] + edge_length[i][1]*edge_length[i][1]) * (mass[i] / 12.0))
axis = vector(1,0.5,1)
angle = -3.1415/6.0
rotation[i] = quaternion(axis, angle) * rotation[i]
rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i])
elif is_halfspace(i):
mass[i] = infinity
inv_inertia[i] = 0.0
def spring_dashpot(i, j):
delta_ij = -penetration_depth(i, j)
skip_when(delta_ij < 0.0)
velocity_wf_i = linear_velocity[i] + cross(angular_velocity[i], contact_point(i, j) - position[i])
velocity_wf_j = linear_velocity[j] + cross(angular_velocity[j], contact_point(i, j) - position[j])
rel_vel = -(velocity_wf_i - velocity_wf_j)
rel_vel_n = dot(rel_vel, contact_normal(i, j))
rel_vel_t = rel_vel - rel_vel_n * contact_normal(i, j)
fNabs = stiffness[i,j] * delta_ij + max(damping_norm[i,j] * rel_vel_n, 0.0)
fN = fNabs * contact_normal(i, j)
fTabs = min(damping_tan[i,j] * length(rel_vel_t), friction[i, j] * fNabs)
fT = fTabs * normalized(rel_vel_t)
partial_force = fN + fT
apply(force, partial_force)
apply(torque, cross(contact_point(i, j) - position[i], partial_force))
def euler(i):
skip_when(is_fixed(i) or is_infinite(i))
inv_mass = 1.0 / mass[i]
position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt
linear_velocity[i] += inv_mass * force[i] * dt
wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i])
phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt
rotation[i] = quaternion(phi, length(phi)) * rotation[i]
rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i])
angular_velocity[i] += wdot * dt
def gravity(i):
force[i][2] -= mass[i] * gravity_SI
file_name = os.path.basename(__file__)
file_name_without_extension = os.path.splitext(file_name)[0]
psim = pairs.simulation(
file_name_without_extension,
[pairs.sphere(), pairs.halfspace(), pairs.box()],
double_prec=True,
particle_capacity=1000000,
neighbor_capacity=20,
debug=True)
target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
if target == 'gpu':
psim.target(pairs.target_gpu())
elif target == 'cpu':
psim.target(pairs.target_cpu())
else:
print(f"Invalid target, use {sys.argv[0]} <cpu/gpu>")
psim.add_position('position')
psim.add_property('mass', pairs.real())
psim.add_property('linear_velocity', pairs.vector())
psim.add_property('angular_velocity', pairs.vector())
psim.add_property('force', pairs.vector(), volatile=True)
psim.add_property('torque', pairs.vector(), volatile=True)
psim.add_property('radius', pairs.real())
psim.add_property('normal', pairs.vector())
psim.add_property('inv_inertia', pairs.matrix())
psim.add_property('rotation_matrix', pairs.matrix())
psim.add_property('rotation', pairs.quaternion())
psim.add_property('edge_length', pairs.vector())
ntypes = 2
psim.add_feature('type', ntypes)
psim.add_feature_property('type', 'stiffness', pairs.real(), [3000 for i in range(ntypes * ntypes)])
psim.add_feature_property('type', 'damping_norm', pairs.real(), [10.0 for i in range(ntypes * ntypes)])
psim.add_feature_property('type', 'damping_tan', pairs.real())
psim.add_feature_property('type', 'friction', pairs.real())
psim.set_domain_partitioner(pairs.block_forest())
psim.pbc([False, False, False])
psim.build_cell_lists()
psim.compute(update_mass_and_inertia, symbols={'infinity': math.inf })
# 'compute_globals' enables computation of forces on global bodies
psim.compute(spring_dashpot, compute_globals=True)
psim.compute(euler, parameters={'dt': pairs.real()})
gravity_SI = 9.81
psim.compute(gravity, symbols={'gravity_SI': gravity_SI })
psim.generate()
......@@ -36,6 +36,7 @@ def spring_dashpot(i, j):
apply(torque, cross(contact_point(i, j) - position, partial_force))
def euler(i):
skip_when(is_fixed(i) or is_infinite(i))
inv_mass = 1.0 / mass[i]
position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt
linear_velocity[i] += inv_mass * force[i] * dt
......@@ -58,8 +59,7 @@ psim = pairs.simulation(
double_prec=True,
particle_capacity=1000000,
neighbor_capacity=20,
debug=True,
generate_whole_program=False)
debug=True)
target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
......@@ -97,7 +97,7 @@ psim.build_cell_lists()
# The order of user-defined functions is not important here since
# they are not used by other subroutines and are only callable individually
psim.compute(update_mass_and_inertia, symbols={'infinity': math.inf })
psim.compute(spring_dashpot)
psim.compute(spring_dashpot, profile=True)
psim.compute(euler, parameters={'dt': pairs.real()})
gravity_SI = 9.81
......
Subproject commit 65fcc780c8843f982b23e3b1f14de6408982c5d8
......@@ -13,7 +13,7 @@
#include <blockforest/loadbalancing/weight_assignment/MetisAssignmentFunctor.h>
#include <blockforest/loadbalancing/weight_assignment/WeightAssignmentFunctor.h>
//---
#include "../boundary_weights.hpp"
#include "boundary_weights.hpp"
#include "../pairs_common.hpp"
#include "../devices/device.hpp"
#include "regular_6d_stencil.hpp"
......@@ -24,8 +24,8 @@ namespace pairs {
BlockForest::BlockForest(
PairsRuntime *ps_,
real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool pbcx, bool pbcy, bool pbcz, bool balance_workload_) :
DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax), ps(ps_), globalPBC{pbcx, pbcy, pbcz}, balance_workload(balance_workload_) {
real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool balance_workload_) :
DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax), ps(ps_), balance_workload(balance_workload_) {
subdom = new real_t[ndims * 2];
}
......@@ -35,8 +35,7 @@ BlockForest::BlockForest(PairsRuntime *ps_, const std::shared_ptr<walberla::bloc
DomainPartitioner(bf->getDomain().xMin(), bf->getDomain().xMax(),
bf->getDomain().yMin(), bf->getDomain().yMax(),
bf->getDomain().zMin(), bf->getDomain().zMax()),
ps(ps_),
globalPBC{bf->isXPeriodic(), bf->isYPeriodic(), bf->isZPeriodic()} {
ps(ps_) {
subdom = new real_t[ndims * 2];
mpiManager = walberla::mpi::MPIManager::instance();
world_size = mpiManager->numProcesses();
......@@ -60,10 +59,8 @@ void BlockForest::updateNeighborhood() {
for(uint neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) {
auto neighbor_rank = walberla::int_c(block->getNeighborProcess(neigh));
// Neighbor blocks that belong to the same rank should be added to
// neighboorhood only if there's PBC along any dim, otherwise they should be skipped.
// TODO: Make PBCs work with runtime load balancing
if((neighbor_rank != me) || globalPBC[0] || globalPBC[1] || globalPBC[2]) {
// if(neighbor_rank != me) {
const walberla::BlockID& neighbor_id = block->getNeighborId(neigh);
walberla::math::AABB neighbor_aabb = block->getNeighborAABB(neigh);
auto begin = blocks_pushed[neighbor_rank].begin();
......@@ -73,7 +70,7 @@ void BlockForest::updateNeighborhood() {
neighborhood[neighbor_rank].push_back(neighbor_aabb);
blocks_pushed[neighbor_rank].push_back(neighbor_id);
}
}
// }
}
}
......@@ -182,38 +179,41 @@ void BlockForest::updateWeights() {
}
}
walberla::Vector3<int> BlockForest::getBlockConfig(int num_processes, int nx, int ny, int nz) {
const int bx_factor = 1;
const int by_factor = 1;
const int bz_factor = 1;
const int ax = nx * ny;
const int ay = nx * nz;
const int az = ny * nz;
int bestsurf = 2 * (ax + ay + az);
int x = 1;
int y = 1;
int z = 1;
for(int i = 1; i < num_processes; ++i) {
if(num_processes % i == 0) {
const int rem_yz = num_processes / i;
for(int j = 1; j < rem_yz; ++j) {
if(rem_yz % j == 0) {
const int k = rem_yz / j;
const int surf = (ax / i / j) + (ay / i / k) + (az / j / k);
walberla::Vector3<int> BlockForest::getBlockConfig() {
real_t area[3];
real_t best_surf = 0.0;
int ndims = 3;
int d = 0;
int nranks[3] = {1, 1, 1};
for(int d1 = 0; d1 < ndims; d1++) {
for(int d2 = d1 + 1; d2 < ndims; d2++) {
area[d] = (this->grid_max[d1] - this->grid_min[d1]) *
(this->grid_max[d2] - this->grid_min[d2]);
best_surf += 2.0 * area[d];
d++;
}
}
for (int i = 1; i <= world_size; i++) {
if (world_size % i == 0) {
const int rem_yz = world_size / i;
if(surf < bestsurf) {
x = i, y = j, z = k;
bestsurf = surf;
for (int j = 1; j <= rem_yz; j++) {
if (rem_yz % j == 0) {
const int k = rem_yz / j;
const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k);
if (surf < best_surf) {
nranks[0] = i;
nranks[1] = j;
nranks[2] = k;
best_surf = surf;
}
}
}
}
}
}
return walberla::Vector3<int>(x * bx_factor, y * by_factor, z * bz_factor);
return walberla::Vector3<int>(nranks[0], nranks[1], nranks[2]);
}
int BlockForest::getInitialRefinementLevel(int num_processes) {
......@@ -253,20 +253,14 @@ void BlockForest::initialize(int *argc, char ***argv) {
mpiManager->useWorldComm();
world_size = mpiManager->numProcesses();
rank = mpiManager->rank();
walberla::math::AABB domain(
grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], grid_max[2]);
int gridsize[3] = {32, 32, 32};
auto procs = mpiManager->numProcesses();
auto block_config = balance_workload ? walberla::Vector3<int>(1, 1, 1) :
getBlockConfig(procs, gridsize[0], gridsize[1], gridsize[2]);
auto ref_level = balance_workload ? getInitialRefinementLevel(procs) : 0;
walberla::Vector3<bool> pbc(globalPBC[0], globalPBC[1], globalPBC[2]);
forest = walberla::blockforest::createBlockForest(domain, block_config, pbc, procs, ref_level);
auto block_config = balance_workload ? walberla::Vector3<int>(1, 1, 1) : getBlockConfig();
auto ref_level = balance_workload ? getInitialRefinementLevel(world_size) : 0;
// PBC's are forced to true here and sperately handled when determining ghosts
walberla::Vector3<bool> pbc(true, true, true);
walberla::math::AABB domain(grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], grid_max[2]);
forest = walberla::blockforest::createBlockForest(domain, block_config, pbc, world_size, ref_level);
this->info = make_shared<walberla::blockforest::InfoCollection>();
......@@ -298,7 +292,7 @@ void BlockForest::update() {
// PAIRS_DEBUG("Rebalance\n");
if (rank==0) std::cout << "Rebalance" << std::endl;
forest->refresh();
}
}
this->updateNeighborhood();
this->setBoundingBox();
......
......@@ -40,14 +40,13 @@ private:
std::vector<double> aabbs;
PairsRuntime *ps;
real_t *subdom;
const bool globalPBC[3];
int world_size, rank, nranks, total_aabbs;
bool balance_workload = false;
public:
BlockForest(
PairsRuntime *ps_,
real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool pbcx, bool pbcy, bool pbcz, bool balance_workload_);
real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool balance_workload_);
BlockForest(PairsRuntime *ps_, const std::shared_ptr<walberla::blockforest::BlockForest> &bf);
......@@ -69,7 +68,7 @@ public:
void updateNeighborhood();
void updateWeights();
walberla::math::Vector3<int> getBlockConfig(int num_processes, int nx, int ny, int nz);
walberla::math::Vector3<int> getBlockConfig();
int getInitialRefinementLevel(int num_processes);
void setBoundingBox();
void rebalance();
......
......@@ -6,8 +6,8 @@
#include <fstream>
#include <sstream>
//---
#include "pairs.hpp"
#include "pairs_common.hpp"
#include "../pairs.hpp"
#include "../pairs_common.hpp"
namespace pairs {
......
......@@ -15,7 +15,6 @@ namespace pairs {
void PairsRuntime::initDomain(
int *argc, char ***argv,
real_t xmin, real_t ymin, real_t zmin, real_t xmax, real_t ymax, real_t zmax,
bool pbcx, bool pbcy, bool pbcz,
bool balance_workload) {
int mpi_initialized=0;
......@@ -40,7 +39,7 @@ void PairsRuntime::initDomain(
#ifdef USE_WALBERLA
else if(dom_part_type == BlockForestPartitioning) {
dom_part = new BlockForest(this, xmin, xmax, ymin, ymax, zmin, zmax, pbcx, pbcy, pbcz, balance_workload);
dom_part = new BlockForest(this, xmin, xmax, ymin, ymax, zmin, zmax, balance_workload);
}
#endif
......@@ -188,12 +187,15 @@ void PairsRuntime::copyArraySliceToDevice(
if(action == Ignore || action == WriteAfterRead || action == ReadOnly) {
if(action == Ignore || !array_flags->isDeviceFlagSet(array_id)) {
if(!array.isStatic()) {
PAIRS_DEBUG(
"Copying array %s to device (offset=%lu, n=%lu)\n",
array.getName().c_str(), offset, size);
// PAIRS_DEBUG(
// "Copying array %s to device (offset=%lu, n=%lu)\n",
// array.getName().c_str(), offset, size);
this->getTimers()->start(TimerMarkers::DeviceTransfers);
pairs::copy_slice_to_device(
array.getHostPointer(), array.getDevicePointer(), offset, size);
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
}
}
}
......@@ -210,20 +212,22 @@ void PairsRuntime::copyArrayToDevice(Array &array, action_t action, size_t size)
if(action == Ignore || action == WriteAfterRead || action == ReadOnly) {
if(action == Ignore || !array_flags->isDeviceFlagSet(array_id)) {
this->getTimers()->start(TimerMarkers::DeviceTransfers);
if(array.isStatic()) {
PAIRS_DEBUG(
"Copying static array %s to device (n=%lu)\n",
array.getName().c_str(), size);
// PAIRS_DEBUG(
// "Copying static array %s to device (n=%lu)\n",
// array.getName().c_str(), size);
pairs::copy_static_symbol_to_device(
array.getHostPointer(), array.getDevicePointer(), size);
} else {
PAIRS_DEBUG(
"Copying array %s to device (n=%lu)\n",
array.getName().c_str(), size);
// PAIRS_DEBUG(
// "Copying array %s to device (n=%lu)\n",
// array.getName().c_str(), size);
pairs::copy_to_device(array.getHostPointer(), array.getDevicePointer(), size);
}
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
}
}
......@@ -240,12 +244,15 @@ void PairsRuntime::copyArraySliceToHost(Array &array, action_t action, size_t of
if(action == Ignore || action == WriteAfterRead || action == ReadOnly) {
if(action == Ignore || !array_flags->isHostFlagSet(array_id)) {
if(!array.isStatic()) {
PAIRS_DEBUG(
"Copying array %s to host (offset=%lu, n=%lu)\n",
array.getName().c_str(), offset, size);
// PAIRS_DEBUG(
// "Copying array %s to host (offset=%lu, n=%lu)\n",
// array.getName().c_str(), offset, size);
this->getTimers()->start(TimerMarkers::DeviceTransfers);
pairs::copy_slice_to_host(
array.getDevicePointer(), array.getHostPointer(), offset, size);
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
}
}
}
......@@ -262,16 +269,19 @@ void PairsRuntime::copyArrayToHost(Array &array, action_t action, size_t size) {
if(action == Ignore || action == WriteAfterRead || action == ReadOnly) {
if(action == Ignore || !array_flags->isHostFlagSet(array_id)) {
this->getTimers()->start(TimerMarkers::DeviceTransfers);
if(array.isStatic()) {
PAIRS_DEBUG(
"Copying static array %s to host (n=%lu)\n", array.getName().c_str(), size);
// PAIRS_DEBUG(
// "Copying static array %s to host (n=%lu)\n", array.getName().c_str(), size);
pairs::copy_static_symbol_to_host(
array.getDevicePointer(), array.getHostPointer(), size);
} else {
PAIRS_DEBUG("Copying array %s to host (n=%lu)\n", array.getName().c_str(), size);
// PAIRS_DEBUG("Copying array %s to host (n=%lu)\n", array.getName().c_str(), size);
pairs::copy_to_host(array.getDevicePointer(), array.getHostPointer(), size);
}
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
}
}
......@@ -287,8 +297,10 @@ void PairsRuntime::copyPropertyToDevice(Property &prop, action_t action, size_t
if(action == Ignore || action == WriteAfterRead || action == ReadOnly) {
if(action == Ignore || !prop_flags->isDeviceFlagSet(prop_id)) {
PAIRS_DEBUG("Copying property %s to device (n=%lu)\n", prop.getName().c_str(), size);
// PAIRS_DEBUG("Copying property %s to device (n=%lu)\n", prop.getName().c_str(), size);
this->getTimers()->start(TimerMarkers::DeviceTransfers);
pairs::copy_to_device(prop.getHostPointer(), prop.getDevicePointer(), size);
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
}
}
......@@ -304,8 +316,10 @@ void PairsRuntime::copyPropertyToHost(Property &prop, action_t action, size_t si
if(action == Ignore || action == WriteAfterRead || action == ReadOnly) {
if(action == Ignore || !prop_flags->isHostFlagSet(prop_id)) {
PAIRS_DEBUG("Copying property %s to host (n=%lu)\n", prop.getName().c_str(), size);
// PAIRS_DEBUG("Copying property %s to host (n=%lu)\n", prop.getName().c_str(), size);
this->getTimers()->start(TimerMarkers::DeviceTransfers);
pairs::copy_to_host(prop.getDevicePointer(), prop.getHostPointer(), size);
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
}
}
......@@ -323,11 +337,14 @@ void PairsRuntime::copyContactPropertyToDevice(
if(action == Ignore || action == WriteAfterRead || action == ReadOnly) {
if(action == Ignore || !contact_prop_flags->isDeviceFlagSet(prop_id)) {
PAIRS_DEBUG("Copying contact property %s to device (n=%lu)\n",
contact_prop.getName().c_str(), size);
// PAIRS_DEBUG("Copying contact property %s to device (n=%lu)\n",
// contact_prop.getName().c_str(), size);
this->getTimers()->start(TimerMarkers::DeviceTransfers);
pairs::copy_to_device(
contact_prop.getHostPointer(), contact_prop.getDevicePointer(), size);
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
contact_prop_flags->setDeviceFlag(prop_id);
}
......@@ -345,11 +362,12 @@ void PairsRuntime::copyContactPropertyToHost(
if(action == Ignore || action == WriteAfterRead || action == ReadOnly) {
if(!contact_prop_flags->isHostFlagSet(contact_prop.getId())) {
PAIRS_DEBUG("Copying contact property %s to host (n=%lu)\n",
contact_prop.getName().c_str(), size);
// PAIRS_DEBUG("Copying contact property %s to host (n=%lu)\n",
// contact_prop.getName().c_str(), size);
this->getTimers()->start(TimerMarkers::DeviceTransfers);
pairs::copy_to_host(
contact_prop.getDevicePointer(), contact_prop.getHostPointer(), size);
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
contact_prop_flags->setHostFlag(prop_id);
}
......@@ -363,26 +381,27 @@ void PairsRuntime::copyContactPropertyToHost(
void PairsRuntime::copyFeaturePropertyToDevice(FeatureProperty &feature_prop) {
const size_t n = feature_prop.getArraySize();
PAIRS_DEBUG("Copying feature property %s to device (n=%lu)\n",
feature_prop.getName().c_str(), n);
// PAIRS_DEBUG("Copying feature property %s to device (n=%lu)\n",
// feature_prop.getName().c_str(), n);
this->getTimers()->start(TimerMarkers::DeviceTransfers);
pairs::copy_static_symbol_to_device(
feature_prop.getHostPointer(), feature_prop.getDevicePointer(), n);
this->getTimers()->stop(TimerMarkers::DeviceTransfers);
}
void PairsRuntime::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) {
auto nsend_id = getArrayByHostPointer(send_sizes).getId();
auto nrecv_id = getArrayByHostPointer(recv_sizes).getId();
this->getTimers()->start(DeviceTransfers);
copyArrayToHost(nsend_id, ReadOnly);
array_flags->setHostFlag(nrecv_id);
array_flags->clearDeviceFlag(nrecv_id);
this->getTimers()->stop(DeviceTransfers);
this->getTimers()->start(Communication);
this->getTimers()->start(TimerMarkers::MPI);
this->getDomainPartitioner()->communicateSizes(dim, send_sizes, recv_sizes);
this->getTimers()->stop(Communication);
this->getTimers()->stop(TimerMarkers::MPI);
}
void PairsRuntime::communicateData(
......@@ -401,7 +420,6 @@ void PairsRuntime::communicateData(
auto nsend_id = getArrayByHostPointer(nsend).getId();
auto nrecv_id = getArrayByHostPointer(nrecv).getId();
this->getTimers()->start(DeviceTransfers);
copyArrayToHost(send_offsets_id, ReadOnly);
copyArrayToHost(recv_offsets_id, ReadOnly);
copyArrayToHost(nsend_id, ReadOnly);
......@@ -434,17 +452,13 @@ void PairsRuntime::communicateData(
array_flags->clearDeviceFlag(recv_buf_id);
#endif
this->getTimers()->stop(DeviceTransfers);
this->getTimers()->start(Communication);
this->getTimers()->start(TimerMarkers::MPI);
this->getDomainPartitioner()->communicateData(
dim, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv);
this->getTimers()->stop(Communication);
this->getTimers()->stop(TimerMarkers::MPI);
#ifndef ENABLE_CUDA_AWARE_MPI
this->getTimers()->start(DeviceTransfers);
copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t));
this->getTimers()->stop(DeviceTransfers);
#endif
}
......@@ -464,7 +478,6 @@ void PairsRuntime::communicateDataReverse(
auto nsend_id = getArrayByHostPointer(nsend).getId();
auto nrecv_id = getArrayByHostPointer(nrecv).getId();
this->getTimers()->start(DeviceTransfers);
copyArrayToHost(send_offsets_id, ReadOnly);
copyArrayToHost(recv_offsets_id, ReadOnly);
copyArrayToHost(nsend_id, ReadOnly);
......@@ -497,17 +510,13 @@ void PairsRuntime::communicateDataReverse(
array_flags->clearDeviceFlag(recv_buf_id);
#endif
this->getTimers()->stop(DeviceTransfers);
this->getTimers()->start(Communication);
this->getTimers()->start(TimerMarkers::MPI);
this->getDomainPartitioner()->communicateDataReverse(
dim, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv);
this->getTimers()->stop(Communication);
this->getTimers()->stop(TimerMarkers::MPI);
#ifndef ENABLE_CUDA_AWARE_MPI
this->getTimers()->start(DeviceTransfers);
copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t));
this->getTimers()->stop(DeviceTransfers);
#endif
}
......@@ -527,7 +536,6 @@ void PairsRuntime::communicateAllData(
auto nsend_id = getArrayByHostPointer(nsend).getId();
auto nrecv_id = getArrayByHostPointer(nrecv).getId();
this->getTimers()->start(DeviceTransfers);
copyArrayToHost(send_offsets_id, ReadOnly);
copyArrayToHost(recv_offsets_id, ReadOnly);
copyArrayToHost(nsend_id, ReadOnly);
......@@ -560,17 +568,13 @@ void PairsRuntime::communicateAllData(
array_flags->clearDeviceFlag(recv_buf_id);
#endif
this->getTimers()->stop(DeviceTransfers);
this->getTimers()->start(Communication);
this->getTimers()->start(TimerMarkers::MPI);
this->getDomainPartitioner()->communicateAllData(
ndims, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv);
this->getTimers()->stop(Communication);
this->getTimers()->stop(TimerMarkers::MPI);
#ifndef ENABLE_CUDA_AWARE_MPI
this->getTimers()->start(DeviceTransfers);
copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t));
this->getTimers()->stop(DeviceTransfers);
#endif
}
......@@ -590,7 +594,6 @@ void PairsRuntime::communicateContactHistoryData(
auto nsend_contact_id = getArrayByHostPointer(nsend_contact).getId();
auto nrecv_contact_id = getArrayByHostPointer(nrecv_contact).getId();
this->getTimers()->start(DeviceTransfers);
copyArrayToHost(contact_soffsets_id, ReadOnly);
copyArrayToHost(nsend_contact_id, ReadOnly);
......@@ -611,9 +614,7 @@ void PairsRuntime::communicateContactHistoryData(
array_flags->clearDeviceFlag(recv_buf_id);
#endif
this->getTimers()->stop(DeviceTransfers);
this->getTimers()->start(Communication);
this->getTimers()->start(TimerMarkers::MPI);
this->getDomainPartitioner()->communicateSizes(dim, nsend_contact, nrecv_contact);
contact_roffsets[dim * 2 + 0] = 0;
......@@ -630,13 +631,11 @@ void PairsRuntime::communicateContactHistoryData(
send_buf_ptr, contact_soffsets, nsend_contact,
recv_buf_ptr, contact_roffsets, nrecv_contact);
this->getTimers()->stop(Communication);
this->getTimers()->stop(TimerMarkers::MPI);
#ifndef ENABLE_CUDA_AWARE_MPI
this->getTimers()->start(DeviceTransfers);
copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * sizeof(real_t));
copyArrayToDevice(contact_roffsets_id, Ignore);
this->getTimers()->stop(DeviceTransfers);
#endif
}
......@@ -644,4 +643,22 @@ void PairsRuntime::copyRuntimeArray(const std::string& name, void *dest, const i
this->getDomainPartitioner()->copyRuntimeArray(name, dest, size);
}
void PairsRuntime::allReduceInplaceSum(real_t *red_buffer, int num_elems){
real_t *buff_ptr = red_buffer;
auto buff_array = getArrayByHostPointer(red_buffer);
#ifdef ENABLE_CUDA_AWARE_MPI
buff_ptr = (real_t *) buff_array.getDevicePointer();
#else
copyArrayToHost(buff_array, Ignore, num_elems * sizeof(real_t));
#endif
MPI_Allreduce(MPI_IN_PLACE, buff_ptr, num_elems, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#ifndef ENABLE_CUDA_AWARE_MPI
copyArrayToDevice(buff_array, Ignore, num_elems * sizeof(real_t));
#endif
}
}