diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..a42fb023f2ab03da6df441e4871a05ef9e4843b7 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/walberla"] + path = external/walberla + url = https://i10git.cs.fau.de/walberla/walberla.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ecd4d90006027630096f1c57c268bb5d494e7da..12dcd69019fc3901d1aaff74af9957ad05817145 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,14 +9,9 @@ set(CMAKE_CXX_FLAGS_RELEASE "-O3") set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG") option(USE_MPI "USE_MPI" ON) +option(USE_WALBERLA "Enable waLBerla support for using BlockForest domain partitioning and dynamic load balancing" OFF) option(COMPILE_CUDA "COMPILE_CUDA" OFF) -option(GENERATE_WHOLE_PROGRAM "Generate the whole program (i.e. including the 'main' function). No additional source files are needed." OFF) -option(BUILD_APP "Build a stand-alone app which uses the P4IRS modular interface. Provide your source files with -DUSER_SOURCE_FILES" OFF) - -if(GENERATE_WHOLE_PROGRAM AND BUILD_APP) - message(FATAL_ERROR "You must choose either GENERATE_WHOLE_PROGRAM or BUILD_APP or neither.\n - Choose neither if you only want to use the P4IRS library in your project (in a seperate build system).") -endif() +set(USER_SOURCE_FILES "" CACHE STRING "List of source files to compile (semicolon-separated)") set(INPUT_SCRIPT ${INPUT_SCRIPT} CACHE PATH "The input python script triggering code generation") if(NOT EXISTS ${INPUT_SCRIPT}) @@ -42,7 +37,7 @@ file(MAKE_DIRECTORY ${OUTPUT_DIR}) #================================================================================ # TODO: Unify all interfaces set(GEN_INTERFACE_DIR ${CMAKE_CURRENT_BINARY_DIR}/internal_interfaces) -set(GEN_INTERFACE_HEADER ${CMAKE_CURRENT_BINARY_DIR}/last_generated.hpp) +set(GEN_INTERFACE_HEADER ${GEN_INTERFACE_DIR}/last_generated.hpp) file(MAKE_DIRECTORY ${GEN_INTERFACE_DIR}) #================================================================================ @@ -50,14 +45,14 @@ file(MAKE_DIRECTORY ${GEN_INTERFACE_DIR}) #================================================================================ set(RUNTIME_COMMON_FILES runtime/pairs.cpp - runtime/copper_fcc_lattice.cpp - runtime/create_body.cpp - runtime/dem_sc_grid.cpp - runtime/read_from_file.cpp - runtime/stats.cpp - runtime/thermo.cpp - runtime/timing.cpp - runtime/vtk.cpp + runtime/utility/copper_fcc_lattice.cpp + runtime/utility/create_body.cpp + runtime/utility/dem_sc_grid.cpp + runtime/utility/read_from_file.cpp + runtime/utility/stats.cpp + runtime/utility/thermo.cpp + runtime/utility/timing.cpp + runtime/utility/vtk.cpp runtime/domain/regular_6d_stencil.cpp) #================================================================================ @@ -71,7 +66,7 @@ set(PAIRS_LINK_DIRS ${CMAKE_CURRENT_BINARY_DIR}) set(PAIRS_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}) # The target can either be an executable or a static library -if(GENERATE_WHOLE_PROGRAM OR BUILD_APP) +if(USER_SOURCE_FILES) add_executable(${PAIRS_TARGET} ${RUNTIME_COMMON_FILES}) else() add_library(${PAIRS_TARGET} STATIC ${RUNTIME_COMMON_FILES}) @@ -90,13 +85,7 @@ set_target_properties(${PAIRS_TARGET} PROPERTIES #================================================================================ # USER_SOURCE_FILES ============================================================= #================================================================================ -if(BUILD_APP) - set(USER_SOURCE_FILES "" CACHE STRING "List of source files to compile (semicolon-separated)") - if(NOT USER_SOURCE_FILES) - message(FATAL_ERROR "BUILD_APP is ON. You have to specify source files like this:\n - -DUSER_SOURCE_FILES=src/main.cpp;src/helper.cpp") - endif() - +if(USER_SOURCE_FILES) foreach(file ${USER_SOURCE_FILES}) if(NOT EXISTS ${file}) message(FATAL_ERROR "File '${file}' does not exist!") @@ -108,14 +97,8 @@ endif() #================================================================================ # waLBerla ====================================================================== #================================================================================ -set(WALBERLA_DIR ${WALBERLA_DIR} CACHE PATH "Path to waLBerla source directory (required only when using BlockForest partitioning).") - -if(WALBERLA_DIR) - if(EXISTS "${WALBERLA_DIR}") - target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA) - else() - message(FATAL_ERROR "Invalid WALBERLA_DIR: '${WALBERLA_DIR}' does not exist.") - endif() +if(USE_WALBERLA) + target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA) set(RUNTIME_WALBERLA_FILES runtime/domain/block_forest.cpp @@ -123,18 +106,19 @@ if(WALBERLA_DIR) # TODO: Generate the host/device functions for computing weights if(COMPILE_CUDA) - list(APPEND RUNTIME_WALBERLA_FILES runtime/boundary_weights.cu) + list(APPEND RUNTIME_WALBERLA_FILES runtime/domain/boundary_weights.cu) else() - list(APPEND RUNTIME_WALBERLA_FILES runtime/boundary_weights.cpp) + list(APPEND RUNTIME_WALBERLA_FILES runtime/domain/boundary_weights.cpp) endif() - + target_sources(${PAIRS_TARGET} PRIVATE ${RUNTIME_WALBERLA_FILES}) - ## Linking walberla modules - set(PAIRS_WALBERLA_DEPENDENCIES blockforest core pe) - find_package(waLBerla REQUIRED) - set(WALBERLA_LINK_LIBRARIES_KEYWORD PUBLIC) - target_link_modules(${PAIRS_TARGET} ${PAIRS_WALBERLA_DEPENDENCIES}) # This is a walberla helper function + add_subdirectory(external/walberla EXCLUDE_FROM_ALL) + waLBerla_import() + + # Recent issue from the waLBerla side: warnings about empty CUDA_ARCHITECTURES for walberla_gpu + set(PAIRS_WALBERLA_DEPENDENCIES walberla::blockforest walberla::core) + target_link_libraries(${PAIRS_TARGET} PUBLIC ${PAIRS_WALBERLA_DEPENDENCIES}) ## TODO: PAIRS_LINK_DIRS and PAIRS_LINK_LIBRARIES for walberla modules *AND* their dependencies ## This implemention only works if the consumer of the library is itself a walberla app (made within the build system of walberla) @@ -250,10 +234,12 @@ endif() # LIKWID ======================================================================== #================================================================================ if(LIKWID_DIR) + # Example: -DLIKWID_DIR=/apps/likwid/5.4.0 + target_compile_options(${PAIRS_TARGET} PRIVATE -DLIKWID_PERFMON -pthread) - target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.a) - list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.a) + target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.so) + list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.so) include_directories(${LIKWID_DIR}/include) list(APPEND PAIRS_INCLUDE_DIRS "${LIKWID_DIR}/include") diff --git a/README.md b/README.md index 882644c488aa6495080216615ba203927e6d0dd6..4d62ccb2b2b5e6d15b93875ac311a3ebc0597994 100644 --- a/README.md +++ b/README.md @@ -100,57 +100,34 @@ else: psim.generate() ``` -## Build instructions +## Build Instructions -P4IRS can be built in 3 different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment. +P4IRS can be built in two different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment. In the following, we assume we have created and navigated to a build directory: `mkdir build; cd build` -**General CMake flags (applicable to all 3 modes):** +**Basic CMake flags:** * Pass your input script to CMake using `-DINPUT_SCRIPT=path/to/script.py` * Enable CUDA with `-DCOMPILE_CUDA=ON` -* Enable support for BlockForest domain partitioning and dynamic load balancing by providing the path to waLBerla source directory `-DWALBERLA_DIR=path/to/walberla` (TODO: waLBerla as a submodule) +* Enable waLBerla support with `-DUSE_WALBERLA=ON` for using BlockForest domain partitioning and dynamic load balancing -### 1. Whole-program generation: +### 1. Stand-Alone P4IRS Application --------------------- -In this mode, everything including the `main` function is generated by P4IRS. +To build a C++ application using P4IRS, provide the list of your source files to CMake using the `-DUSER_SOURCE_FILES` flag (semicolon-seperated). -1. Set `generate_whole_program=True` in the input script -2. Set the CMake flag `-DGENERATE_WHOLE_PROGRAM=ON` +**Example**: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) using [spring_dashpot.py](examples/modular/spring_dashpot.py) as the input script. -Example: Build [md.py](examples/whole-program-generation/md.py) ``` -cmake -DINPUT_SCRIPT=../examples/whole-program-generation/md.py -DGENERATE_WHOLE_PROGRAM=ON .. -``` -Now call `make` and an **executable** is built. - - -### 2. Modular stand-alone app ---------------------- -You can build a stand-alone C++ app which uses the P4IRS modular interface. - -1. Set `generate_whole_program=False` in the input script -2. Set the CMake flag `-DBUILD_APP=ON` -3. Provide the list of your source files to CMake (semicolon-seperated):`-DUSER_SOURCE_FILES=path/to/main.cpp;path/to/helper.cpp` - -Example: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) with [spring_dashpot.py](examples/modular/spring_dashpot.py) -Note: In this example we assume waLBerla has been already cloned next to the P4IRS directory. - -``` -cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DBUILD_APP=ON -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DWALBERLA_DIR=../../walberla .. +cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DUSE_WALBERLA=ON .. ``` Now call `make` and an **executable** is built. -### 3. P4IRS as a library +### 2. P4IRS as a Library --------------------- -In this mode, P4IRS is compiled as a library that can be integrated into other projects. - -1. Set `generate_whole_program=False` in the input script -2. Ensure both `BUILD_APP` and `GENERATE_WHOLE_PROGRAM` are `OFF` (they are OFF by default) - -Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows: +P4IRS can also be compiled as a library for integration into larger projects. +To compile P4IRS as a library, simply do not pass any `USER_SOURCE_FILES` to CMake. Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows: ```cmake find_package(pairs REQUIRED HINTS "path/to/pairs/build" NO_DEFAULT_PATH) target_include_directories(my_app PUBLIC ${PAIRS_INCLUDE_DIRS}) diff --git a/cmake/FindwaLBerla.cmake b/cmake/FindwaLBerla.cmake deleted file mode 100644 index 8f87e88a03902f1c1af3900a3d4d38b921996682..0000000000000000000000000000000000000000 --- a/cmake/FindwaLBerla.cmake +++ /dev/null @@ -1,13 +0,0 @@ -if ( WALBERLA_DIR ) - # WALBERLA_DIR has to point to the waLBerla source directory - # this command builds waLBerla (again) in the current build directory in the subfolder "walberla" (second argument) - add_subdirectory( ${WALBERLA_DIR} walberla EXCLUDE_FROM_ALL) - - waLBerla_import() - # Adds the 'src' and 'tests' directory of current app - list( APPEND WALBERLA_MODULE_DIRS "${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/tests" ) - list( REMOVE_DUPLICATES WALBERLA_MODULE_DIRS ) - set ( WALBERLA_MODULE_DIRS ${WALBERLA_MODULE_DIRS} CACHE INTERNAL "All folders that contain modules or tests" ) -else() - message( FATAL_ERROR "waLBerla not found - Use 'cmake -DWALBERLA_DIR=path_to_waLBerla_sources pathToApplicationSources' " ) -endif() diff --git a/examples/modular/force_reduction.cpp b/examples/modular/force_reduction.cpp index e884b3d0acab91189ffd4465364d9e24319e8992..7e770a58704c2ff928a395f9111094557406af43 100644 --- a/examples/modular/force_reduction.cpp +++ b/examples/modular/force_reduction.cpp @@ -15,8 +15,8 @@ int main(int argc, char **argv) { // Create bodies pairs::id_t pUid = pairs::create_sphere(pairs_runtime, 0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0); - // setup_sim after creating all bodies - pairs_sim->setup_sim(); + // setup_cells after creating all bodies + pairs_sim->setup_cells(); pairs_sim->update_mass_and_inertia(); // Track particle diff --git a/examples/modular/force_reduction.py b/examples/modular/force_reduction.py index af9cea7058190b7fda485cc1ec3a6fd6cde8b4f1..8f15dd6d417428133345f0a6010ab475baa9ef98 100644 --- a/examples/modular/force_reduction.py +++ b/examples/modular/force_reduction.py @@ -58,8 +58,7 @@ psim = pairs.simulation( double_prec=True, particle_capacity=1000000, neighbor_capacity=20, - debug=True, - generate_whole_program=False) + debug=True) target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" diff --git a/examples/modular/sd_1.cpp b/examples/modular/sd_1.cpp index e56cfc5943c7626d97d6b872524655b9dd0dca0b..d7e0dcae798ee4b8e46013bd9e872ad97ea76eb4 100644 --- a/examples/modular/sd_1.cpp +++ b/examples/modular/sd_1.cpp @@ -21,7 +21,7 @@ int main(int argc, char **argv) { pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); - pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); + pairs_sim->setup_cells(0.1, 0.1, 0.1, 0.1); pairs_sim->update_mass_and_inertia(); int num_timesteps = 2000; diff --git a/examples/modular/sd_2.cpp b/examples/modular/sd_2.cpp index 4bdb4c85a37efef0391ccecc9d2a1e1df6e3313d..3ae126949f934aa2d1eff1eb3d0d895a26afec9d 100644 --- a/examples/modular/sd_2.cpp +++ b/examples/modular/sd_2.cpp @@ -42,7 +42,7 @@ int main(int argc, char **argv) { pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); - pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); + pairs_sim->setup_cells(0.1, 0.1, 0.1, 0.1); pairs_sim->update_mass_and_inertia(); int num_timesteps = 2000; diff --git a/examples/modular/sd_3_GPU.cu b/examples/modular/sd_3_GPU.cu index b44af846643ed9cf0a730b07c5f60543560e29b8..bf95b2037f366cb417320f22a34c24591a1f7ebc 100644 --- a/examples/modular/sd_3_GPU.cu +++ b/examples/modular/sd_3_GPU.cu @@ -65,7 +65,7 @@ int main(int argc, char **argv) { auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();}; - pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); + pairs_sim->setup_cells(0.1, 0.1, 0.1, 0.1); pairs_sim->update_mass_and_inertia(); pairs_sim->communicate(0); diff --git a/examples/modular/sd_4.cpp b/examples/modular/sd_4.cpp index 80ec40313e13692d1b41fcee01f6b5ce7d4ef91b..e83e66d1d4e29f2d281b2673ace6047cba7a3eb2 100644 --- a/examples/modular/sd_4.cpp +++ b/examples/modular/sd_4.cpp @@ -41,7 +41,6 @@ int main(int argc, char **argv) { pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 20, 20, 20, // Domain bounds - false, false, false, // PBCs --------------> TODO: runtime pbc true // Enable dynamic load balancing (does initial refinement on a <1,1,1> blockforest) ); @@ -61,7 +60,7 @@ int main(int argc, char **argv) { double lcw = diameter_max * 1.01; // Linked-cell width double interaction_radius = diameter_max; - pairs_sim->setup_sim(lcw, lcw, lcw, interaction_radius); + pairs_sim->setup_cells(lcw, lcw, lcw, interaction_radius); pairs_sim->update_mass_and_inertia(); diff --git a/examples/modular/sphere_box_global.cpp b/examples/modular/sphere_box_global.cpp new file mode 100644 index 0000000000000000000000000000000000000000..783bbaa691b4a9ba29804b421a77daf6b18099b7 --- /dev/null +++ b/examples/modular/sphere_box_global.cpp @@ -0,0 +1,101 @@ +#include <iostream> +#include <memory> +#include <iomanip> + +#include "sphere_box_global.hpp" + +// cmake -DINPUT_SCRIPT=../examples/modular/sphere_box_global.py -DWALBERLA_DIR=../../walberla -DBUILD_APP=ON -DUSER_SOURCE_FILES=../examples/modular/sphere_box_global.cpp -DCOMPILE_CUDA=ON .. + +void set_feature_properties(std::shared_ptr<PairsAccessor> &ac){ + ac->setTypeStiffness(0,0, 1e6); + ac->setTypeStiffness(0,1, 1e6); + ac->setTypeStiffness(1,0, 1e6); + ac->setTypeStiffness(1,1, 1e6); + ac->syncTypeStiffness(); + + ac->setTypeDampingNorm(0,0, 300); + ac->setTypeDampingNorm(0,1, 300); + ac->setTypeDampingNorm(1,0, 300); + ac->setTypeDampingNorm(1,1, 300); + ac->syncTypeDampingNorm(); + + ac->setTypeFriction(0,0, 1.2); + ac->setTypeFriction(0,1, 1.2); + ac->setTypeFriction(1,0, 1.2); + ac->setTypeFriction(1,1, 1.2); + ac->syncTypeFriction(); + + ac->setTypeDampingTan(0,0, 300); + ac->setTypeDampingTan(0,1, 300); + ac->setTypeDampingTan(1,0, 300); + ac->setTypeDampingTan(1,1, 300); + ac->syncTypeDampingTan(); +} + +int main(int argc, char **argv) { + auto pairs_sim = std::make_shared<PairsSimulation>(); + pairs_sim->initialize(); + + auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); + set_feature_properties(ac); + + auto pairs_runtime = pairs_sim->getPairsRuntime(); + + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 30, 30, 30, false, false, false, true); + pairs_runtime->getDomainPartitioner()->initWorkloadBalancer(pairs::Hilbert, 100, 800); + + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, pairs::flags::INFINITE | pairs::flags::FIXED); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, pairs::flags::INFINITE | pairs::flags::FIXED); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, pairs::flags::INFINITE | pairs::flags::FIXED); + pairs::create_halfspace(pairs_runtime, 30,30,30, -1, 0, 0, 0, pairs::flags::INFINITE | pairs::flags::FIXED); + pairs::create_halfspace(pairs_runtime, 30,30,30, 0, -1, 0, 0, pairs::flags::INFINITE | pairs::flags::FIXED); + pairs::create_halfspace(pairs_runtime, 30,30,30, 0, 0, -1, 0, pairs::flags::INFINITE | pairs::flags::FIXED); + + double radius = 0.5; + // Create a bed of small particles + pairs::dem_sc_grid(pairs_runtime, 30, 20, 5, radius*2 , radius*2 , radius*2, radius*2, 2, 250, 2); + + // Create 3 global bodies, one of which is fixed + pairs::create_box(pairs_runtime, 12, 12, 13.5, 0, 0, 0, 15, 2, 13, 20, 0, pairs::flags::GLOBAL); + pairs::create_sphere(pairs_runtime, 15, 20, 15, 0, 4, 0, 50, 4, 0, pairs::flags::GLOBAL); + pairs::create_sphere(pairs_runtime, 15, 25, 4, 0, 0, 0, 50, 4, 0, pairs::flags::GLOBAL | pairs::flags::FIXED); + + // Use the diameter of small particles to set up the cell list + double lcw = radius * 2; + pairs_sim->setup_sim(lcw, lcw, lcw, lcw); + pairs_sim->update_mass_and_inertia(); + pairs_sim->communicate(0); + + int num_timesteps = 20000; + int vtk_freq = 100; + int rebalance_freq = 2000; + double dt = 0.001; + + pairs::vtk_write_subdom(pairs_runtime, "output/subdom_init", 0); + + for (int t=0; t<num_timesteps; ++t){ + if ((t % vtk_freq==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl; + + if (t % rebalance_freq == 0){ + pairs_sim->update_domain(); + } + + pairs_sim->update_cells(t); + + pairs_sim->gravity(); + + // All global and local interactions are contained within the 'spring_dashpot' module + // You have the option to call spring_dashpot before or after 'gravity' or any other force-update module + pairs_sim->spring_dashpot(); + + pairs_sim->euler(dt); + pairs_sim->communicate(t); + + if (t % vtk_freq==0){ + pairs::vtk_with_rotation(pairs_runtime, pairs::Shapes::Box, "output/local_boxes", 0, pairs_sim->nlocal(), t); + pairs::vtk_with_rotation(pairs_runtime, pairs::Shapes::Sphere, "output/local_spheres", 0, pairs_sim->nlocal(), t); + } + } + + pairs_sim->end(); +} \ No newline at end of file diff --git a/examples/modular/sphere_box_global.py b/examples/modular/sphere_box_global.py new file mode 100644 index 0000000000000000000000000000000000000000..53722f0d32d363f3113d914607e6d5c8f7d18c8a --- /dev/null +++ b/examples/modular/sphere_box_global.py @@ -0,0 +1,120 @@ +import math +import pairs +import sys +import os + +def update_mass_and_inertia(i): + rotation_matrix[i] = diagonal_matrix(1.0) + rotation[i] = default_quaternion() + + if is_sphere(i): + inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i])) + + elif is_box(i): + inv_inertia[i] = inversed(diagonal_matrix ( + edge_length[i][1]*edge_length[i][1] + edge_length[i][2]*edge_length[i][2], + edge_length[i][0]*edge_length[i][0] + edge_length[i][2]*edge_length[i][2], + edge_length[i][0]*edge_length[i][0] + edge_length[i][1]*edge_length[i][1]) * (mass[i] / 12.0)) + + axis = vector(1,0.5,1) + angle = -3.1415/6.0 + rotation[i] = quaternion(axis, angle) * rotation[i] + rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i]) + + elif is_halfspace(i): + mass[i] = infinity + inv_inertia[i] = 0.0 + + +def spring_dashpot(i, j): + delta_ij = -penetration_depth(i, j) + skip_when(delta_ij < 0.0) + + velocity_wf_i = linear_velocity[i] + cross(angular_velocity[i], contact_point(i, j) - position[i]) + velocity_wf_j = linear_velocity[j] + cross(angular_velocity[j], contact_point(i, j) - position[j]) + + rel_vel = -(velocity_wf_i - velocity_wf_j) + rel_vel_n = dot(rel_vel, contact_normal(i, j)) + rel_vel_t = rel_vel - rel_vel_n * contact_normal(i, j) + + fNabs = stiffness[i,j] * delta_ij + max(damping_norm[i,j] * rel_vel_n, 0.0) + fN = fNabs * contact_normal(i, j) + + fTabs = min(damping_tan[i,j] * length(rel_vel_t), friction[i, j] * fNabs) + fT = fTabs * normalized(rel_vel_t) + + partial_force = fN + fT + apply(force, partial_force) + apply(torque, cross(contact_point(i, j) - position[i], partial_force)) + +def euler(i): + skip_when(is_fixed(i) or is_infinite(i)) + inv_mass = 1.0 / mass[i] + position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt + linear_velocity[i] += inv_mass * force[i] * dt + wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i]) + phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt + rotation[i] = quaternion(phi, length(phi)) * rotation[i] + rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i]) + angular_velocity[i] += wdot * dt + +def gravity(i): + force[i][2] -= mass[i] * gravity_SI + + +file_name = os.path.basename(__file__) +file_name_without_extension = os.path.splitext(file_name)[0] + +psim = pairs.simulation( + file_name_without_extension, + [pairs.sphere(), pairs.halfspace(), pairs.box()], + double_prec=True, + particle_capacity=1000000, + neighbor_capacity=20, + debug=True) + + +target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" + +if target == 'gpu': + psim.target(pairs.target_gpu()) +elif target == 'cpu': + psim.target(pairs.target_cpu()) +else: + print(f"Invalid target, use {sys.argv[0]} <cpu/gpu>") + +psim.add_position('position') +psim.add_property('mass', pairs.real()) +psim.add_property('linear_velocity', pairs.vector()) +psim.add_property('angular_velocity', pairs.vector()) +psim.add_property('force', pairs.vector(), volatile=True) +psim.add_property('torque', pairs.vector(), volatile=True) +psim.add_property('radius', pairs.real()) +psim.add_property('normal', pairs.vector()) +psim.add_property('inv_inertia', pairs.matrix()) +psim.add_property('rotation_matrix', pairs.matrix()) +psim.add_property('rotation', pairs.quaternion()) +psim.add_property('edge_length', pairs.vector()) + +ntypes = 2 +psim.add_feature('type', ntypes) +psim.add_feature_property('type', 'stiffness', pairs.real(), [3000 for i in range(ntypes * ntypes)]) +psim.add_feature_property('type', 'damping_norm', pairs.real(), [10.0 for i in range(ntypes * ntypes)]) +psim.add_feature_property('type', 'damping_tan', pairs.real()) +psim.add_feature_property('type', 'friction', pairs.real()) + +psim.set_domain_partitioner(pairs.block_forest()) +psim.pbc([False, False, False]) +psim.build_cell_lists() + +psim.compute(update_mass_and_inertia, symbols={'infinity': math.inf }) + +# 'compute_globals' enables computation of forces on global bodies +psim.compute(spring_dashpot, compute_globals=True) +psim.compute(euler, parameters={'dt': pairs.real()}) + +gravity_SI = 9.81 +psim.compute(gravity, symbols={'gravity_SI': gravity_SI }) + +psim.generate() + diff --git a/examples/modular/spring_dashpot.py b/examples/modular/spring_dashpot.py index 191c000ca61962af8ebae77ab4ec1b97b433d399..94f4bfe9183ac43ab61d6ab83ee95cdae8fed08d 100644 --- a/examples/modular/spring_dashpot.py +++ b/examples/modular/spring_dashpot.py @@ -36,6 +36,7 @@ def spring_dashpot(i, j): apply(torque, cross(contact_point(i, j) - position, partial_force)) def euler(i): + skip_when(is_fixed(i) or is_infinite(i)) inv_mass = 1.0 / mass[i] position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt linear_velocity[i] += inv_mass * force[i] * dt @@ -58,8 +59,7 @@ psim = pairs.simulation( double_prec=True, particle_capacity=1000000, neighbor_capacity=20, - debug=True, - generate_whole_program=False) + debug=True) target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" @@ -97,7 +97,7 @@ psim.build_cell_lists() # The order of user-defined functions is not important here since # they are not used by other subroutines and are only callable individually psim.compute(update_mass_and_inertia, symbols={'infinity': math.inf }) -psim.compute(spring_dashpot) +psim.compute(spring_dashpot, profile=True) psim.compute(euler, parameters={'dt': pairs.real()}) gravity_SI = 9.81 diff --git a/external/walberla b/external/walberla new file mode 160000 index 0000000000000000000000000000000000000000..65fcc780c8843f982b23e3b1f14de6408982c5d8 --- /dev/null +++ b/external/walberla @@ -0,0 +1 @@ +Subproject commit 65fcc780c8843f982b23e3b1f14de6408982c5d8 diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp index 0851f2c1733243792909dff5e15b8a8ddc9bb2e6..1ad9922658bd11ca9f90731b68198810bcb674c6 100644 --- a/runtime/domain/block_forest.cpp +++ b/runtime/domain/block_forest.cpp @@ -13,7 +13,7 @@ #include <blockforest/loadbalancing/weight_assignment/MetisAssignmentFunctor.h> #include <blockforest/loadbalancing/weight_assignment/WeightAssignmentFunctor.h> //--- -#include "../boundary_weights.hpp" +#include "boundary_weights.hpp" #include "../pairs_common.hpp" #include "../devices/device.hpp" #include "regular_6d_stencil.hpp" @@ -24,8 +24,8 @@ namespace pairs { BlockForest::BlockForest( PairsRuntime *ps_, - real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool pbcx, bool pbcy, bool pbcz, bool balance_workload_) : - DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax), ps(ps_), globalPBC{pbcx, pbcy, pbcz}, balance_workload(balance_workload_) { + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool balance_workload_) : + DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax), ps(ps_), balance_workload(balance_workload_) { subdom = new real_t[ndims * 2]; } @@ -35,8 +35,7 @@ BlockForest::BlockForest(PairsRuntime *ps_, const std::shared_ptr<walberla::bloc DomainPartitioner(bf->getDomain().xMin(), bf->getDomain().xMax(), bf->getDomain().yMin(), bf->getDomain().yMax(), bf->getDomain().zMin(), bf->getDomain().zMax()), - ps(ps_), - globalPBC{bf->isXPeriodic(), bf->isYPeriodic(), bf->isZPeriodic()} { + ps(ps_) { subdom = new real_t[ndims * 2]; mpiManager = walberla::mpi::MPIManager::instance(); world_size = mpiManager->numProcesses(); @@ -60,10 +59,8 @@ void BlockForest::updateNeighborhood() { for(uint neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) { auto neighbor_rank = walberla::int_c(block->getNeighborProcess(neigh)); - // Neighbor blocks that belong to the same rank should be added to - // neighboorhood only if there's PBC along any dim, otherwise they should be skipped. // TODO: Make PBCs work with runtime load balancing - if((neighbor_rank != me) || globalPBC[0] || globalPBC[1] || globalPBC[2]) { + // if(neighbor_rank != me) { const walberla::BlockID& neighbor_id = block->getNeighborId(neigh); walberla::math::AABB neighbor_aabb = block->getNeighborAABB(neigh); auto begin = blocks_pushed[neighbor_rank].begin(); @@ -73,7 +70,7 @@ void BlockForest::updateNeighborhood() { neighborhood[neighbor_rank].push_back(neighbor_aabb); blocks_pushed[neighbor_rank].push_back(neighbor_id); } - } + // } } } @@ -182,38 +179,41 @@ void BlockForest::updateWeights() { } } -walberla::Vector3<int> BlockForest::getBlockConfig(int num_processes, int nx, int ny, int nz) { - const int bx_factor = 1; - const int by_factor = 1; - const int bz_factor = 1; - const int ax = nx * ny; - const int ay = nx * nz; - const int az = ny * nz; - - int bestsurf = 2 * (ax + ay + az); - int x = 1; - int y = 1; - int z = 1; - - for(int i = 1; i < num_processes; ++i) { - if(num_processes % i == 0) { - const int rem_yz = num_processes / i; - - for(int j = 1; j < rem_yz; ++j) { - if(rem_yz % j == 0) { - const int k = rem_yz / j; - const int surf = (ax / i / j) + (ay / i / k) + (az / j / k); +walberla::Vector3<int> BlockForest::getBlockConfig() { + real_t area[3]; + real_t best_surf = 0.0; + int ndims = 3; + int d = 0; + int nranks[3] = {1, 1, 1}; + + for(int d1 = 0; d1 < ndims; d1++) { + for(int d2 = d1 + 1; d2 < ndims; d2++) { + area[d] = (this->grid_max[d1] - this->grid_min[d1]) * + (this->grid_max[d2] - this->grid_min[d2]); + best_surf += 2.0 * area[d]; + d++; + } + } + + for (int i = 1; i <= world_size; i++) { + if (world_size % i == 0) { + const int rem_yz = world_size / i; - if(surf < bestsurf) { - x = i, y = j, z = k; - bestsurf = surf; + for (int j = 1; j <= rem_yz; j++) { + if (rem_yz % j == 0) { + const int k = rem_yz / j; + const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k); + if (surf < best_surf) { + nranks[0] = i; + nranks[1] = j; + nranks[2] = k; + best_surf = surf; } - } + } } } } - - return walberla::Vector3<int>(x * bx_factor, y * by_factor, z * bz_factor); + return walberla::Vector3<int>(nranks[0], nranks[1], nranks[2]); } int BlockForest::getInitialRefinementLevel(int num_processes) { @@ -253,20 +253,14 @@ void BlockForest::initialize(int *argc, char ***argv) { mpiManager->useWorldComm(); world_size = mpiManager->numProcesses(); rank = mpiManager->rank(); - - walberla::math::AABB domain( - grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], grid_max[2]); - - int gridsize[3] = {32, 32, 32}; - auto procs = mpiManager->numProcesses(); - auto block_config = balance_workload ? walberla::Vector3<int>(1, 1, 1) : - getBlockConfig(procs, gridsize[0], gridsize[1], gridsize[2]); - - auto ref_level = balance_workload ? getInitialRefinementLevel(procs) : 0; - - walberla::Vector3<bool> pbc(globalPBC[0], globalPBC[1], globalPBC[2]); - - forest = walberla::blockforest::createBlockForest(domain, block_config, pbc, procs, ref_level); + + auto block_config = balance_workload ? walberla::Vector3<int>(1, 1, 1) : getBlockConfig(); + auto ref_level = balance_workload ? getInitialRefinementLevel(world_size) : 0; + + // PBC's are forced to true here and sperately handled when determining ghosts + walberla::Vector3<bool> pbc(true, true, true); + walberla::math::AABB domain(grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], grid_max[2]); + forest = walberla::blockforest::createBlockForest(domain, block_config, pbc, world_size, ref_level); this->info = make_shared<walberla::blockforest::InfoCollection>(); @@ -298,7 +292,7 @@ void BlockForest::update() { // PAIRS_DEBUG("Rebalance\n"); if (rank==0) std::cout << "Rebalance" << std::endl; forest->refresh(); -} + } this->updateNeighborhood(); this->setBoundingBox(); diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp index d814d02c423358b99f11622bbe2ed7f88b557f97..039f86769c8eb3b20c39aee7dd22a179d9221707 100644 --- a/runtime/domain/block_forest.hpp +++ b/runtime/domain/block_forest.hpp @@ -40,14 +40,13 @@ private: std::vector<double> aabbs; PairsRuntime *ps; real_t *subdom; - const bool globalPBC[3]; int world_size, rank, nranks, total_aabbs; bool balance_workload = false; public: BlockForest( PairsRuntime *ps_, - real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool pbcx, bool pbcy, bool pbcz, bool balance_workload_); + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool balance_workload_); BlockForest(PairsRuntime *ps_, const std::shared_ptr<walberla::blockforest::BlockForest> &bf); @@ -69,7 +68,7 @@ public: void updateNeighborhood(); void updateWeights(); - walberla::math::Vector3<int> getBlockConfig(int num_processes, int nx, int ny, int nz); + walberla::math::Vector3<int> getBlockConfig(); int getInitialRefinementLevel(int num_processes); void setBoundingBox(); void rebalance(); diff --git a/runtime/boundary_weights.cpp b/runtime/domain/boundary_weights.cpp similarity index 100% rename from runtime/boundary_weights.cpp rename to runtime/domain/boundary_weights.cpp diff --git a/runtime/boundary_weights.cu b/runtime/domain/boundary_weights.cu similarity index 100% rename from runtime/boundary_weights.cu rename to runtime/domain/boundary_weights.cu diff --git a/runtime/boundary_weights.hpp b/runtime/domain/boundary_weights.hpp similarity index 86% rename from runtime/boundary_weights.hpp rename to runtime/domain/boundary_weights.hpp index e84348a0c8438255ca0090b765b56fd95f7deb10..3b7fd22e4132b243531e07ed885bc1883b229820 100644 --- a/runtime/boundary_weights.hpp +++ b/runtime/domain/boundary_weights.hpp @@ -6,8 +6,8 @@ #include <fstream> #include <sstream> //--- -#include "pairs.hpp" -#include "pairs_common.hpp" +#include "../pairs.hpp" +#include "../pairs_common.hpp" namespace pairs { diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp index 6efead8d8cb598c1fcd481beb35c12464c235830..a2382820e441daae0163511d12038919aae329f4 100644 --- a/runtime/pairs.cpp +++ b/runtime/pairs.cpp @@ -15,7 +15,6 @@ namespace pairs { void PairsRuntime::initDomain( int *argc, char ***argv, real_t xmin, real_t ymin, real_t zmin, real_t xmax, real_t ymax, real_t zmax, - bool pbcx, bool pbcy, bool pbcz, bool balance_workload) { int mpi_initialized=0; @@ -40,7 +39,7 @@ void PairsRuntime::initDomain( #ifdef USE_WALBERLA else if(dom_part_type == BlockForestPartitioning) { - dom_part = new BlockForest(this, xmin, xmax, ymin, ymax, zmin, zmax, pbcx, pbcy, pbcz, balance_workload); + dom_part = new BlockForest(this, xmin, xmax, ymin, ymax, zmin, zmax, balance_workload); } #endif @@ -188,12 +187,15 @@ void PairsRuntime::copyArraySliceToDevice( if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isDeviceFlagSet(array_id)) { if(!array.isStatic()) { - PAIRS_DEBUG( - "Copying array %s to device (offset=%lu, n=%lu)\n", - array.getName().c_str(), offset, size); + // PAIRS_DEBUG( + // "Copying array %s to device (offset=%lu, n=%lu)\n", + // array.getName().c_str(), offset, size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_slice_to_device( array.getHostPointer(), array.getDevicePointer(), offset, size); + + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } } @@ -210,20 +212,22 @@ void PairsRuntime::copyArrayToDevice(Array &array, action_t action, size_t size) if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isDeviceFlagSet(array_id)) { + this->getTimers()->start(TimerMarkers::DeviceTransfers); if(array.isStatic()) { - PAIRS_DEBUG( - "Copying static array %s to device (n=%lu)\n", - array.getName().c_str(), size); + // PAIRS_DEBUG( + // "Copying static array %s to device (n=%lu)\n", + // array.getName().c_str(), size); pairs::copy_static_symbol_to_device( array.getHostPointer(), array.getDevicePointer(), size); } else { - PAIRS_DEBUG( - "Copying array %s to device (n=%lu)\n", - array.getName().c_str(), size); + // PAIRS_DEBUG( + // "Copying array %s to device (n=%lu)\n", + // array.getName().c_str(), size); pairs::copy_to_device(array.getHostPointer(), array.getDevicePointer(), size); } + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } @@ -240,12 +244,15 @@ void PairsRuntime::copyArraySliceToHost(Array &array, action_t action, size_t of if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isHostFlagSet(array_id)) { if(!array.isStatic()) { - PAIRS_DEBUG( - "Copying array %s to host (offset=%lu, n=%lu)\n", - array.getName().c_str(), offset, size); + // PAIRS_DEBUG( + // "Copying array %s to host (offset=%lu, n=%lu)\n", + // array.getName().c_str(), offset, size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_slice_to_host( array.getDevicePointer(), array.getHostPointer(), offset, size); + + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } } @@ -262,16 +269,19 @@ void PairsRuntime::copyArrayToHost(Array &array, action_t action, size_t size) { if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isHostFlagSet(array_id)) { + this->getTimers()->start(TimerMarkers::DeviceTransfers); + if(array.isStatic()) { - PAIRS_DEBUG( - "Copying static array %s to host (n=%lu)\n", array.getName().c_str(), size); + // PAIRS_DEBUG( + // "Copying static array %s to host (n=%lu)\n", array.getName().c_str(), size); pairs::copy_static_symbol_to_host( array.getDevicePointer(), array.getHostPointer(), size); } else { - PAIRS_DEBUG("Copying array %s to host (n=%lu)\n", array.getName().c_str(), size); + // PAIRS_DEBUG("Copying array %s to host (n=%lu)\n", array.getName().c_str(), size); pairs::copy_to_host(array.getDevicePointer(), array.getHostPointer(), size); } + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } @@ -287,8 +297,10 @@ void PairsRuntime::copyPropertyToDevice(Property &prop, action_t action, size_t if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !prop_flags->isDeviceFlagSet(prop_id)) { - PAIRS_DEBUG("Copying property %s to device (n=%lu)\n", prop.getName().c_str(), size); + // PAIRS_DEBUG("Copying property %s to device (n=%lu)\n", prop.getName().c_str(), size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_to_device(prop.getHostPointer(), prop.getDevicePointer(), size); + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } @@ -304,8 +316,10 @@ void PairsRuntime::copyPropertyToHost(Property &prop, action_t action, size_t si if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !prop_flags->isHostFlagSet(prop_id)) { - PAIRS_DEBUG("Copying property %s to host (n=%lu)\n", prop.getName().c_str(), size); + // PAIRS_DEBUG("Copying property %s to host (n=%lu)\n", prop.getName().c_str(), size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_to_host(prop.getDevicePointer(), prop.getHostPointer(), size); + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } @@ -323,11 +337,14 @@ void PairsRuntime::copyContactPropertyToDevice( if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !contact_prop_flags->isDeviceFlagSet(prop_id)) { - PAIRS_DEBUG("Copying contact property %s to device (n=%lu)\n", - contact_prop.getName().c_str(), size); + // PAIRS_DEBUG("Copying contact property %s to device (n=%lu)\n", + // contact_prop.getName().c_str(), size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_to_device( contact_prop.getHostPointer(), contact_prop.getDevicePointer(), size); + + this->getTimers()->stop(TimerMarkers::DeviceTransfers); contact_prop_flags->setDeviceFlag(prop_id); } @@ -345,11 +362,12 @@ void PairsRuntime::copyContactPropertyToHost( if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(!contact_prop_flags->isHostFlagSet(contact_prop.getId())) { - PAIRS_DEBUG("Copying contact property %s to host (n=%lu)\n", - contact_prop.getName().c_str(), size); - + // PAIRS_DEBUG("Copying contact property %s to host (n=%lu)\n", + // contact_prop.getName().c_str(), size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_to_host( contact_prop.getDevicePointer(), contact_prop.getHostPointer(), size); + this->getTimers()->stop(TimerMarkers::DeviceTransfers); contact_prop_flags->setHostFlag(prop_id); } @@ -363,26 +381,27 @@ void PairsRuntime::copyContactPropertyToHost( void PairsRuntime::copyFeaturePropertyToDevice(FeatureProperty &feature_prop) { const size_t n = feature_prop.getArraySize(); - PAIRS_DEBUG("Copying feature property %s to device (n=%lu)\n", - feature_prop.getName().c_str(), n); + // PAIRS_DEBUG("Copying feature property %s to device (n=%lu)\n", + // feature_prop.getName().c_str(), n); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_static_symbol_to_device( feature_prop.getHostPointer(), feature_prop.getDevicePointer(), n); + + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } void PairsRuntime::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { auto nsend_id = getArrayByHostPointer(send_sizes).getId(); auto nrecv_id = getArrayByHostPointer(recv_sizes).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(nsend_id, ReadOnly); array_flags->setHostFlag(nrecv_id); array_flags->clearDeviceFlag(nrecv_id); - this->getTimers()->stop(DeviceTransfers); - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateSizes(dim, send_sizes, recv_sizes); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); } void PairsRuntime::communicateData( @@ -401,7 +420,6 @@ void PairsRuntime::communicateData( auto nsend_id = getArrayByHostPointer(nsend).getId(); auto nrecv_id = getArrayByHostPointer(nrecv).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(send_offsets_id, ReadOnly); copyArrayToHost(recv_offsets_id, ReadOnly); copyArrayToHost(nsend_id, ReadOnly); @@ -434,17 +452,13 @@ void PairsRuntime::communicateData( array_flags->clearDeviceFlag(recv_buf_id); #endif - this->getTimers()->stop(DeviceTransfers); - - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateData( dim, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); #ifndef ENABLE_CUDA_AWARE_MPI - this->getTimers()->start(DeviceTransfers); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); - this->getTimers()->stop(DeviceTransfers); #endif } @@ -464,7 +478,6 @@ void PairsRuntime::communicateDataReverse( auto nsend_id = getArrayByHostPointer(nsend).getId(); auto nrecv_id = getArrayByHostPointer(nrecv).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(send_offsets_id, ReadOnly); copyArrayToHost(recv_offsets_id, ReadOnly); copyArrayToHost(nsend_id, ReadOnly); @@ -497,17 +510,13 @@ void PairsRuntime::communicateDataReverse( array_flags->clearDeviceFlag(recv_buf_id); #endif - this->getTimers()->stop(DeviceTransfers); - - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateDataReverse( dim, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); #ifndef ENABLE_CUDA_AWARE_MPI - this->getTimers()->start(DeviceTransfers); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); - this->getTimers()->stop(DeviceTransfers); #endif } @@ -527,7 +536,6 @@ void PairsRuntime::communicateAllData( auto nsend_id = getArrayByHostPointer(nsend).getId(); auto nrecv_id = getArrayByHostPointer(nrecv).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(send_offsets_id, ReadOnly); copyArrayToHost(recv_offsets_id, ReadOnly); copyArrayToHost(nsend_id, ReadOnly); @@ -560,17 +568,13 @@ void PairsRuntime::communicateAllData( array_flags->clearDeviceFlag(recv_buf_id); #endif - this->getTimers()->stop(DeviceTransfers); - - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateAllData( ndims, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); #ifndef ENABLE_CUDA_AWARE_MPI - this->getTimers()->start(DeviceTransfers); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); - this->getTimers()->stop(DeviceTransfers); #endif } @@ -590,7 +594,6 @@ void PairsRuntime::communicateContactHistoryData( auto nsend_contact_id = getArrayByHostPointer(nsend_contact).getId(); auto nrecv_contact_id = getArrayByHostPointer(nrecv_contact).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(contact_soffsets_id, ReadOnly); copyArrayToHost(nsend_contact_id, ReadOnly); @@ -611,9 +614,7 @@ void PairsRuntime::communicateContactHistoryData( array_flags->clearDeviceFlag(recv_buf_id); #endif - this->getTimers()->stop(DeviceTransfers); - - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateSizes(dim, nsend_contact, nrecv_contact); contact_roffsets[dim * 2 + 0] = 0; @@ -630,13 +631,11 @@ void PairsRuntime::communicateContactHistoryData( send_buf_ptr, contact_soffsets, nsend_contact, recv_buf_ptr, contact_roffsets, nrecv_contact); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); #ifndef ENABLE_CUDA_AWARE_MPI - this->getTimers()->start(DeviceTransfers); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * sizeof(real_t)); copyArrayToDevice(contact_roffsets_id, Ignore); - this->getTimers()->stop(DeviceTransfers); #endif } @@ -644,4 +643,22 @@ void PairsRuntime::copyRuntimeArray(const std::string& name, void *dest, const i this->getDomainPartitioner()->copyRuntimeArray(name, dest, size); } +void PairsRuntime::allReduceInplaceSum(real_t *red_buffer, int num_elems){ + real_t *buff_ptr = red_buffer; + auto buff_array = getArrayByHostPointer(red_buffer); + + #ifdef ENABLE_CUDA_AWARE_MPI + buff_ptr = (real_t *) buff_array.getDevicePointer(); + #else + copyArrayToHost(buff_array, Ignore, num_elems * sizeof(real_t)); + #endif + + MPI_Allreduce(MPI_IN_PLACE, buff_ptr, num_elems, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + #ifndef ENABLE_CUDA_AWARE_MPI + copyArrayToDevice(buff_array, Ignore, num_elems * sizeof(real_t)); + #endif +} + + } diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index e87dec06224d830f8f15fdc2e593278c00d100e6..853e8ef49408ccceb309a31083c6786550eb07d1 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -321,7 +321,7 @@ public: void initDomain( int *argc, char ***argv, real_t xmin, real_t ymin, real_t zmin, real_t xmax, real_t ymax, real_t zmax, - bool pbcx = 0, bool pbcy = 0, bool pbcz = 0, bool balance_workload = 0); + bool balance_workload = false); template<typename Domain_T> void useDomain(const std::shared_ptr<Domain_T> &domain_ptr); @@ -355,16 +355,24 @@ public: int getNumberOfNeighborRanks() { return this->getDomainPartitioner()->getNumberOfNeighborRanks(); } int getNumberOfNeighborAABBs() { return this->getDomainPartitioner()->getNumberOfNeighborAABBs(); } + void allReduceInplaceSum(real_t *red_buffer, int num_elems); + // Device functions void sync() { device_synchronize(); } // Timers Timers<double> *getTimers() { return timers; } + void printTimers() { if(this->getDomainPartitioner()->getRank() == 0) { this->getTimers()->print(); } } + + void logTimers() { + this->getTimers()->writeToFile(this->getDomainPartitioner()->getRank(), + this->getDomainPartitioner()->getWorldSize()); + } }; template<typename Domain_T> diff --git a/runtime/pairs_common.hpp b/runtime/pairs_common.hpp index 74237423ee5d3de07462bcc40739484ef4fd9781..af651893c9bdae1751cf7f962b5049b67578ae97 100644 --- a/runtime/pairs_common.hpp +++ b/runtime/pairs_common.hpp @@ -22,13 +22,13 @@ namespace flags{ constexpr int GLOBAL = 1 << 3 ; } -namespace shapes{ - enum Type { - Sphere = 0, - Halfspace = 1, - PointMass = 2 - }; -} +enum Shapes { + Sphere = 0, + Halfspace = 1, + PointMass = 2, + Box = 3 +}; + //#ifdef USE_DOUBLE_PRECISION typedef double real_t; //#else @@ -79,10 +79,9 @@ enum Actions { }; enum TimerMarkers { - All = 0, - Communication = 1, - DeviceTransfers = 2, - Offset = 3 + MPI = 0, + DeviceTransfers = 1, + Offset = 2 }; enum DomainPartitioners { @@ -127,6 +126,7 @@ constexpr const char* getAlgorithmName(LoadBalancingAlgorithms alg) { # define PAIRS_ASSERT(a) assert(a) # define PAIRS_EXCEPTION(a) #else +// # define PAIRS_DEBUG(...) {printf(__VA_ARGS__);} # define PAIRS_DEBUG(...) # define PAIRS_ASSERT(a) # define PAIRS_EXCEPTION(a) @@ -135,5 +135,6 @@ constexpr const char* getAlgorithmName(LoadBalancingAlgorithms alg) { #define PAIRS_ERROR(...) fprintf(stderr, __VA_ARGS__) #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define SIGN(a) ((a) < 0 ? -1 : 1) } \ No newline at end of file diff --git a/runtime/timers.hpp b/runtime/timers.hpp index c4cdc943aa5faeed57b4971684277e0844d3e7da..e82fd3214f311df9ba7c4f79fae524828c9f76b2 100644 --- a/runtime/timers.hpp +++ b/runtime/timers.hpp @@ -2,6 +2,8 @@ #include <chrono> #include <iostream> #include <unordered_map> +#include <sstream> +#include <iomanip> #pragma once @@ -17,61 +19,127 @@ public: void add(size_t id, std::string name) { counter_names.resize(id + 1); - counters.resize(id + 1); + time_counters.resize(id + 1); + call_counters.resize(id + 1); clocks.resize(id + 1); counter_names[id] = name; } - void start(size_t id) { clocks[id] = std::chrono::high_resolution_clock::now(); } + void start(size_t id) { clocks[id] = std::chrono::high_resolution_clock::now(); ++call_counters[id];} void stop(size_t id) { auto current_clock = std::chrono::high_resolution_clock::now(); - counters[id] += static_cast<TimeType>( + time_counters[id] += static_cast<TimeType>( std::chrono::duration_cast<TimeUnit>(current_clock - clocks[id]).count()) * time_factor; } - void print() { - std::unordered_map<std::string, TimeType> categorySums; + void writeToFile(int rank, int world_size){ + std::string filename = "timers_" + std::to_string(world_size) + ".txt"; + if (rank==0) std::cout << "Writing timers log to: " << filename << std::endl; + + MPI_File file; + MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file); + + std::ostringstream ss; + ss << "Rank: " << rank << "\n"; + ss << std::left << std::setw(80) << "Timer" + << std::left << std::setw(15) << "Total [ms]" + << std::left << std::setw(15) << "Count" << "\n"; + ss << "--------------------------------------------------------------------------------------------------------\n"; + + // Modules + for (size_t i = TimerMarkers::Offset; i < time_counters.size(); ++i) { + const std::string& counterName = counter_names[i]; + if(counterName.length() > 0) { + ss << std::left << std::setw(80) << counter_names[i] + << std::left << std::setw(15) << std::fixed << std::setprecision(2) << time_counters[i] + << std::left << std::setw(15) << call_counters[i] + << "\n"; + } + } + + // Markers + for (size_t i = 0; i < TimerMarkers::Offset; ++i) { + ss << std::left << std::setw(80) << counter_names[i] + << std::left << std::setw(15) << std::fixed << std::setprecision(2) << time_counters[i] + << std::left << std::setw(15) << 1 + << "\n"; + } + + computeCategories(); + + // Categories + for (const auto& cs : categorySums) {; + ss << std::left << std::setw(80) << cs.first + << std::left << std::setw(15) << std::fixed << std::setprecision(2) << cs.second + << std::left << std::setw(15) << 1 + << "\n"; + } + ss << "\n\n"; + + std::string output = ss.str(); + MPI_File_write_ordered(file, output.c_str(), output.size(), MPI_CHAR, MPI_STATUS_IGNORE); + MPI_File_close(&file); + } - std::cout << "all: " << counters[0] << std::endl; - for (size_t i = 1; i < counters.size(); ++i) { + void print(){ + std::cout << "--------------------------------------------------------------------------------------------------------\n"; + std::cout << std::left << std::setw(80) << "Timer (MPI rank: 0)" + << std::left << std::setw(15) << "Total [ms]" + << std::left << std::setw(15) << "Count" << "\n"; + std::cout << "--------------------------------------------------------------------------------------------------------\n"; + + // Modules + for (size_t i = TimerMarkers::Offset; i < time_counters.size(); ++i) { const std::string& counterName = counter_names[i]; - TimeType counterValue = counters[i]; - - if(counterName.find("pack_") == 0 || - counterName.find("unpack_") == 0 || - counterName.find("determine_") == 0 || - counterName.find("set_communication_") == 0 || - counterName.find("remove_exchanged_particles") == 0 || - counterName.find("change_size_after_exchange") == 0) { - - categorySums["communication"] += counterValue; - - } else if(counterName.find("build_cell_lists") == 0 || - counterName.find("build_cell_lists_stencil") == 0 || - counterName.find("partition_cell_lists") == 0 || - counterName.find("build_neighbor_lists") == 0) { - - categorySums["neighbors"] += counterValue; - - } else { - if(counterName.length() > 0) { - categorySums[counterName] += counterValue; - } else { - categorySums["other"] += counterValue; - } + if(counterName.find("INTERFACE_MODULES::") == 0) { + std::cout << std::left << std::setw(80) << counter_names[i] + << std::left << std::setw(15) << std::fixed << std::setprecision(2) << time_counters[i] + << std::left << std::setw(15) << call_counters[i] + << "\n"; } } - // Print the accumulated sums for each category - for(const auto& category: categorySums) { - std::cout << category.first << ": " << category.second << std::endl; + // Markers + for (size_t i = 0; i < TimerMarkers::Offset; ++i) { + std::cout << std::left << std::setw(80) << counter_names[i] + << std::left << std::setw(15) << std::fixed << std::setprecision(2) << time_counters[i] + << std::left << std::setw(15) << 1 + << "\n"; + } + + std::cout << "--------------------------------------------------------------------------------------------------------\n"; + } + + void computeCategories() { + for (size_t i = 0; i < time_counters.size(); ++i) { + const std::string& counterName = counter_names[i]; + TimeType counterValue = time_counters[i]; + + if(counterName.find("INTERNAL_MODULES::pack_") == 0 || + counterName.find("INTERNAL_MODULES::unpack_") == 0 || + counterName.find("INTERNAL_MODULES::determine_") == 0 || + counterName.find("INTERNAL_MODULES::set_communication_") == 0 || + counterName.find("INTERNAL_MODULES::remove_exchanged_particles") == 0 || + counterName.find("INTERNAL_MODULES::change_size_after_exchange") == 0) { + + categorySums["INTERNAL_CATEGORIES::COMMUNICATION"] += counterValue; + + } else if(counterName.find("INTERNAL_MODULES::build_cell_lists") == 0 || + counterName.find("INTERNAL_MODULES::build_cell_lists_stencil") == 0 || + counterName.find("INTERNAL_MODULES::partition_cell_lists") == 0 || + counterName.find("INTERNAL_MODULES::build_neighbor_lists") == 0) { + + categorySums["INTERNAL_CATEGORIES::NEIGHBORS"] += counterValue; + } } } private: std::vector<std::string> counter_names; - std::vector<TimeType> counters; + std::vector<TimeType> time_counters; + std::vector<int> call_counters; + std::unordered_map<std::string, TimeType> categorySums; std::vector<std::chrono::high_resolution_clock::time_point> clocks; TimeType time_factor; }; diff --git a/runtime/copper_fcc_lattice.cpp b/runtime/utility/copper_fcc_lattice.cpp similarity index 100% rename from runtime/copper_fcc_lattice.cpp rename to runtime/utility/copper_fcc_lattice.cpp diff --git a/runtime/copper_fcc_lattice.hpp b/runtime/utility/copper_fcc_lattice.hpp similarity index 100% rename from runtime/copper_fcc_lattice.hpp rename to runtime/utility/copper_fcc_lattice.hpp diff --git a/runtime/create_body.cpp b/runtime/utility/create_body.cpp similarity index 53% rename from runtime/create_body.cpp rename to runtime/utility/create_body.cpp index 6c431f646db037670fa29b0d4e73ab3e35a420ae..5e23c89b43b12e29a2815de740d8efc7963b8c1e 100644 --- a/runtime/create_body.cpp +++ b/runtime/utility/create_body.cpp @@ -18,7 +18,7 @@ id_t create_halfspace(PairsRuntime *pr, if(pr->getDomainPartitioner()->isWithinSubdomain(x, y, z) || flag & (flags::INFINITE | flags::GLOBAL) ){ int n = pr->getTrackedVariableAsInteger("nlocal"); - uid = (flag & flags::GLOBAL) ? UniqueID::createGlobal(pr) : UniqueID::create(pr); + uid = (flag & (flags::INFINITE | flags::GLOBAL)) ? UniqueID::createGlobal(pr) : UniqueID::create(pr); uids(n) = uid; positions(n, 0) = x; positions(n, 1) = y; @@ -28,7 +28,7 @@ id_t create_halfspace(PairsRuntime *pr, normals(n, 2) = nz; types(n) = type; flags(n) = flag; - shapes(n) = 1; // halfspace + shapes(n) = Shapes::Halfspace; pr->setTrackedVariableAsInteger("nlocal", n + 1); } @@ -50,10 +50,11 @@ id_t create_sphere(PairsRuntime *pr, auto radii = pr->getAsFloatProperty(pr->getPropertyByName("radius")); auto positions = pr->getAsVectorProperty(pr->getPropertyByName("position")); auto velocities = pr->getAsVectorProperty(pr->getPropertyByName("linear_velocity")); + auto angular_velocity = pr->getAsVectorProperty(pr->getPropertyByName("angular_velocity")); - if(pr->getDomainPartitioner()->isWithinSubdomain(x, y, z)) { + if(pr->getDomainPartitioner()->isWithinSubdomain(x, y, z) || flag & (flags::INFINITE | flags::GLOBAL)) { int n = pr->getTrackedVariableAsInteger("nlocal"); - uid = (flag & flags::GLOBAL) ? UniqueID::createGlobal(pr) : UniqueID::create(pr); + uid = (flag & (flags::INFINITE | flags::GLOBAL)) ? UniqueID::createGlobal(pr) : UniqueID::create(pr); uids(n) = uid; radii(n) = radius; masses(n) = ((4.0 / 3.0) * M_PI) * radius * radius * radius * density; @@ -65,11 +66,57 @@ id_t create_sphere(PairsRuntime *pr, velocities(n, 2) = vz; types(n) = type; flags(n) = flag; - shapes(n) = 0; // sphere + shapes(n) = Shapes::Sphere; + angular_velocity(n, 0) = 0; + angular_velocity(n, 1) = 0; + angular_velocity(n, 2) = 0; pr->setTrackedVariableAsInteger("nlocal", n + 1); } return uid; } +id_t create_box(PairsRuntime *pr, + double x, double y, double z, + double vx, double vy, double vz, + double ex, double ey, double ez, + double density, int type, int flag){ + // TODO: increase capacity if exceeded + id_t uid = 0; + auto uids = pr->getAsUInt64Property(pr->getPropertyByName("uid")); + auto shapes = pr->getAsIntegerProperty(pr->getPropertyByName("shape")); + auto types = pr->getAsIntegerProperty(pr->getPropertyByName("type")); + auto flags = pr->getAsIntegerProperty(pr->getPropertyByName("flags")); + auto masses = pr->getAsFloatProperty(pr->getPropertyByName("mass")); + auto positions = pr->getAsVectorProperty(pr->getPropertyByName("position")); + auto velocities = pr->getAsVectorProperty(pr->getPropertyByName("linear_velocity")); + auto edge_length = pr->getAsVectorProperty(pr->getPropertyByName("edge_length")); + auto angular_velocity = pr->getAsVectorProperty(pr->getPropertyByName("angular_velocity")); + + if(pr->getDomainPartitioner()->isWithinSubdomain(x, y, z) || flag & (flags::INFINITE | flags::GLOBAL)) { + int n = pr->getTrackedVariableAsInteger("nlocal"); + uid = (flag & (flags::INFINITE | flags::GLOBAL)) ? UniqueID::createGlobal(pr) : UniqueID::create(pr); + uids(n) = uid; + edge_length(n, 0) = ex; + edge_length(n, 1) = ey; + edge_length(n, 2) = ez; + masses(n) = ex * ey * ez * density; + positions(n, 0) = x; + positions(n, 1) = y; + positions(n, 2) = z; + velocities(n, 0) = vx; + velocities(n, 1) = vy; + velocities(n, 2) = vz; + types(n) = type; + flags(n) = flag; + shapes(n) = Shapes::Box; + angular_velocity(n, 0) = 0; + angular_velocity(n, 1) = 0; + angular_velocity(n, 2) = 0; + pr->setTrackedVariableAsInteger("nlocal", n + 1); + } + + return uid; +} + } \ No newline at end of file diff --git a/runtime/create_body.hpp b/runtime/utility/create_body.hpp similarity index 65% rename from runtime/create_body.hpp rename to runtime/utility/create_body.hpp index 995b1f6998940c09d484fad159ba0a382640a82b..299416408044955277cea10e8dbe0f31cf433eea 100644 --- a/runtime/create_body.hpp +++ b/runtime/utility/create_body.hpp @@ -15,4 +15,10 @@ id_t create_sphere(PairsRuntime *pr, double vx, double vy, double vz, double density, double radius, int type, int flag); +id_t create_box(PairsRuntime *pr, + double x, double y, double z, + double vx, double vy, double vz, + double ex, double ey, double ez, + double density, int type, int flag); + } \ No newline at end of file diff --git a/runtime/dem_sc_grid.cpp b/runtime/utility/dem_sc_grid.cpp similarity index 99% rename from runtime/dem_sc_grid.cpp rename to runtime/utility/dem_sc_grid.cpp index 119ec78d75cf50dbbc73a9033750bb791353e6b5..30b2cb017d7a93fae2ff8df56758677c220393af 100644 --- a/runtime/dem_sc_grid.cpp +++ b/runtime/utility/dem_sc_grid.cpp @@ -71,7 +71,7 @@ int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, double velocities(nparticles, 2) = 0.1 * realRandom<real_t>(-initial_velocity, initial_velocity); types(nparticles) = rand() % ntypes; flags(nparticles) = 0; - shapes(nparticles) = shapes::Sphere; + shapes(nparticles) = Shapes::Sphere; /* std::cout << uid(nparticles) << "," << types(nparticles) << "," << masses(nparticles) << "," << radius(nparticles) << "," diff --git a/runtime/dem_sc_grid.hpp b/runtime/utility/dem_sc_grid.hpp similarity index 100% rename from runtime/dem_sc_grid.hpp rename to runtime/utility/dem_sc_grid.hpp diff --git a/runtime/read_from_file.cpp b/runtime/utility/read_from_file.cpp similarity index 100% rename from runtime/read_from_file.cpp rename to runtime/utility/read_from_file.cpp diff --git a/runtime/read_from_file.hpp b/runtime/utility/read_from_file.hpp similarity index 100% rename from runtime/read_from_file.hpp rename to runtime/utility/read_from_file.hpp diff --git a/runtime/stats.cpp b/runtime/utility/stats.cpp similarity index 100% rename from runtime/stats.cpp rename to runtime/utility/stats.cpp diff --git a/runtime/stats.hpp b/runtime/utility/stats.hpp similarity index 100% rename from runtime/stats.hpp rename to runtime/utility/stats.hpp diff --git a/runtime/thermo.cpp b/runtime/utility/thermo.cpp similarity index 100% rename from runtime/thermo.cpp rename to runtime/utility/thermo.cpp diff --git a/runtime/thermo.hpp b/runtime/utility/thermo.hpp similarity index 100% rename from runtime/thermo.hpp rename to runtime/utility/thermo.hpp diff --git a/runtime/timing.cpp b/runtime/utility/timing.cpp similarity index 86% rename from runtime/timing.cpp rename to runtime/utility/timing.cpp index 0068d8117b89d2529e3f8dc10b24e1d2483672e8..6addb69e12f532cab74aea533f04d41bc6fffade 100644 --- a/runtime/timing.cpp +++ b/runtime/utility/timing.cpp @@ -20,4 +20,8 @@ void print_timers(PairsRuntime *ps) { ps->printTimers(); } +void log_timers(PairsRuntime *ps) { + ps->logTimers(); +} + } diff --git a/runtime/timing.hpp b/runtime/utility/timing.hpp similarity index 88% rename from runtime/timing.hpp rename to runtime/utility/timing.hpp index f7544603549232cce89c00e7071948da32511693..80662e9d886965cd0669f0716a5fbb5e87e61264 100644 --- a/runtime/timing.hpp +++ b/runtime/utility/timing.hpp @@ -10,5 +10,6 @@ void register_timer(PairsRuntime *ps, int id, std::string name); void start_timer(PairsRuntime *ps, int id); void stop_timer(PairsRuntime *ps, int id); void print_timers(PairsRuntime *ps); +void log_timers(PairsRuntime *ps); } diff --git a/runtime/vtk.cpp b/runtime/utility/vtk.cpp similarity index 66% rename from runtime/vtk.cpp rename to runtime/utility/vtk.cpp index b6235725c9d7d10cb38e4033a1e456b6b50ab32a..57de0eb5489594c6099969d881187c98989f3bb4 100644 --- a/runtime/vtk.cpp +++ b/runtime/utility/vtk.cpp @@ -6,6 +6,107 @@ namespace pairs { + +void vtk_with_rotation( + PairsRuntime *ps, Shapes shape, const char *filename, int start, int end, int timestep, int frequency) { + + std::string output_filename(filename); + auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); + auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position")); + auto radius = ps->getAsFloatProperty(ps->getPropertyByName("radius")); + auto rotation_matrix = ps->getAsMatrixProperty(ps->getPropertyByName("rotation_matrix")); + auto shapes = ps->getAsIntegerProperty(ps->getPropertyByName("shape")); + const int prec = 8; + int n = end - start; + std::ostringstream filename_oss; + + if(frequency != 0 && timestep % frequency != 0) { + return; + } + + filename_oss << filename << "_"; + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + filename_oss << "r" << ps->getDomainPartitioner()->getRank() << "_"; + } + + filename_oss << timestep << ".vtk"; + std::ofstream out_file(filename_oss.str()); + + ps->copyPropertyToHost(masses, ReadOnly); + ps->copyPropertyToHost(positions, ReadOnly); + ps->copyPropertyToHost(radius, ReadOnly); + ps->copyPropertyToHost(rotation_matrix, ReadOnly); + ps->copyPropertyToHost(shapes, ReadOnly); + + for(int i = start; i < end; i++) { + if(shapes(i) != shape) { + n--; + } + } + + if(out_file.is_open()) { + out_file << "# vtk DataFile Version 2.0\n"; + out_file << "Particle data\n"; + out_file << "ASCII\n"; + out_file << "DATASET POLYDATA\n"; + out_file << "POINTS " << n << " double\n"; + + out_file << std::fixed << std::setprecision(prec); + for(int i = start; i < end; i++) { + if (shapes(i) == shape) { + out_file << positions(i, 0) << " "; + out_file << positions(i, 1) << " "; + out_file << positions(i, 2) << "\n"; + } + } + + out_file << "\n\n"; + out_file << "POINT_DATA " << n << "\n"; + out_file << "SCALARS mass double 1\n"; + out_file << "LOOKUP_TABLE default\n"; + for(int i = start; i < end; i++) { + if (shapes(i) == shape) { + out_file << masses(i) << "\n"; + } + } + + out_file << "\n\n"; + out_file << "SCALARS radius double 1\n"; + out_file << "LOOKUP_TABLE default\n"; + for(int i = start; i < end; i++) { + if (shapes(i) == shape) { + out_file << radius(i) << "\n"; + } + } + + out_file << "\n\n"; + out_file << "TENSORS rotation float\n"; + for(int i = start; i < end; i++) { + if (shapes(i) == shape) { + out_file << rotation_matrix(i, 0) << " " + << rotation_matrix(i, 3) << " " + << rotation_matrix(i, 6) << "\n"; + + out_file << rotation_matrix(i, 1) << " " + << rotation_matrix(i, 4) << " " + << rotation_matrix(i, 7) << "\n"; + + out_file << rotation_matrix(i, 2) << " " + << rotation_matrix(i, 5) << " " + << rotation_matrix(i, 8) << "\n"; + } + } + + out_file << "\n\n"; + out_file.close(); + } + else { + std::cerr << "Failed to open " << filename_oss.str() << std::endl; + exit(-1); + } +} + + void vtk_write_aabb(PairsRuntime *ps, const char *filename, int num, double xmin, double xmax, double ymin, double ymax, diff --git a/runtime/vtk.hpp b/runtime/utility/vtk.hpp similarity index 76% rename from runtime/vtk.hpp rename to runtime/utility/vtk.hpp index dcd97c020f1b49cdf82df548083687e1874f7c51..ac369d5c776f06ae626ab142e16e79dd74b81c7e 100644 --- a/runtime/vtk.hpp +++ b/runtime/utility/vtk.hpp @@ -4,6 +4,9 @@ namespace pairs { +void vtk_with_rotation( + PairsRuntime *ps, Shapes shape, const char *filename, int start, int end, int timestep, int frequency=1); + void vtk_write_aabb(PairsRuntime *ps, const char *filename, int num, double xmin, double xmax, double ymin, double ymax, diff --git a/src/pairs/__init__.py b/src/pairs/__init__.py index 6525a9814b0fc4729c0cf3e24e5a748c5db3ae4b..cbde4f855380c08ad5ecf5a5e996cdfc88c22e90 100644 --- a/src/pairs/__init__.py +++ b/src/pairs/__init__.py @@ -2,7 +2,6 @@ from pairs.ir.types import Types from pairs.code_gen.cgen import CGen from pairs.code_gen.target import Target from pairs.sim.domain_partitioners import DomainPartitioners -from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms from pairs.sim.shapes import Shapes from pairs.sim.simulation import Simulation @@ -11,17 +10,14 @@ def simulation( ref, shapes, dims=3, - timesteps=100, double_prec=False, use_contact_history=False, particle_capacity=800000, neighbor_capacity=100, - debug=False, - generate_whole_program=False): + debug=False): return Simulation( - CGen(ref, debug), shapes, dims, timesteps, double_prec, use_contact_history, - particle_capacity, neighbor_capacity, generate_whole_program) + CGen(ref, debug), shapes, dims, double_prec, use_contact_history, particle_capacity, neighbor_capacity) def target_cpu(parallel=False): if parallel: @@ -59,6 +55,9 @@ def point_mass(): def sphere(): return Shapes.Sphere +def box(): + return Shapes.Box + def halfspace(): return Shapes.Halfspace @@ -70,15 +69,3 @@ def regular_domain_partitioner_xy(): def block_forest(): return DomainPartitioners.BlockForest - -def morton(): - return LoadBalancingAlgorithms.Morton - -def hilbert(): - return LoadBalancingAlgorithms.Hilbert - -def metis(): - return LoadBalancingAlgorithms.Metis - -def diffusive(): - return LoadBalancingAlgorithms.Diffusive \ No newline at end of file diff --git a/src/pairs/analysis/__init__.py b/src/pairs/analysis/__init__.py index 7b200b201ef6b1126275c6656c98419b36e2d89a..a7abf13a4babd0e07acb740fb361c9cce868b3eb 100644 --- a/src/pairs/analysis/__init__.py +++ b/src/pairs/analysis/__init__.py @@ -1,7 +1,7 @@ import time from pairs.analysis.expressions import DetermineExpressionsTerminals, ResetInPlaceOperations, DetermineInPlaceOperations, ListDeclaredExpressions from pairs.analysis.blocks import DiscoverBlockVariants, DetermineExpressionsOwnership, DetermineParentBlocks -from pairs.analysis.devices import FetchKernelReferences, MarkCandidateLoops +from pairs.analysis.devices import FetchKernelReferences, MarkCandidateLoops, FetchDeviceCopies from pairs.analysis.modules import FetchModulesReferences, InferModulesReturnTypes @@ -53,4 +53,7 @@ class Analysis: self.apply(MarkCandidateLoops()) def infer_modules_return_types(self): - self.apply(InferModulesReturnTypes()) \ No newline at end of file + self.apply(InferModulesReturnTypes()) + + def fetch_device_copies(self): + self.apply(FetchDeviceCopies()) \ No newline at end of file diff --git a/src/pairs/analysis/devices.py b/src/pairs/analysis/devices.py index 29e554e4606776693cdfd2dd784fc24d0b6995ea..d8c6c103f8ec6bbc05ff4f4efe6c4e1ba2e872bf 100644 --- a/src/pairs/analysis/devices.py +++ b/src/pairs/analysis/devices.py @@ -9,6 +9,31 @@ from pairs.ir.visitor import Visitor from pairs.ir.vectors import VectorOp +class FetchDeviceCopies(Visitor): + def __init__(self, ast=None): + super().__init__(ast) + self.module_stack = [] + + def visit_Module(self, ast_node): + self.module_stack.append(ast_node) + self.visit_children(ast_node) + self.module_stack.pop() + + def visit_CopyArray(self, ast_node): + self.module_stack[-1].add_device_copy(ast_node.array()) + + def visit_CopyProperty(self, ast_node): + self.module_stack[-1].add_device_copy(ast_node.prop()) + + def visit_CopyFeatureProperty(self, ast_node): + self.module_stack[-1].add_device_copy(ast_node.prop()) + + def visit_CopyContactProperty(self, ast_node): + self.module_stack[-1].add_device_copy(ast_node.contact_prop()) + + def visit_CopyVar(self, ast_node): + self.module_stack[-1].add_device_copy(ast_node.variable()) + class MarkCandidateLoops(Visitor): def __init__(self, ast=None): super().__init__(ast) diff --git a/src/pairs/code_gen/accessor.py b/src/pairs/code_gen/accessor.py index 34421cb64cde53f7a75a9079b39ee90c25b7b235..6f866bebae1f509539c43b5397ae84ff33fa486f 100644 --- a/src/pairs/code_gen/accessor.py +++ b/src/pairs/code_gen/accessor.py @@ -253,7 +253,7 @@ class PairsAcessor: self.print(f"{self.host_device_attr}{tkw} get{funcname}({params}) const{{") - if self.target.is_gpu(): + if self.target.is_gpu() and prop.device_flag: self.ifdef_else("__CUDA_ARCH__", self.getter_body, [prop, True], self.getter_body, [prop, False]) else: self.getter_body(prop, False) diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index 76e22283330f531eed2f449a30eee798bf62c00e..9a07c9444bb1a16d1c7628b3a3d6decd898d11af 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -31,8 +31,6 @@ from pairs.ir.variables import Var, DeclareVariable, Deref from pairs.ir.parameters import Parameter from pairs.ir.vectors import Vector, VectorAccess, VectorOp, ZeroVector from pairs.ir.ret import Return -from pairs.sim.domain_partitioners import DomainPartitioners -from pairs.sim.timestep import Timestep from pairs.code_gen.printer import Printer from pairs.code_gen.accessor import PairsAcessor @@ -45,6 +43,7 @@ class CGen: self.target = None self.print = None self.kernel_context = False + self.loop_scope = False self.generate_full_object_names = False self.ref = ref self.debug = debug @@ -58,15 +57,6 @@ class CGen: def real_type(self): return Types.c_keyword(self.sim, Types.Real) - # def generate_cmake_config_file(self): - # self.print = Printer("pairs_cmake_params.txt") - # self.print.start() - # self.print(f"PAIRS_TARGET={self.ref}") - # self.print(f"GENERATE_WHOLE_PROGRAM={'ON' if self.sim._generate_whole_program else 'OFF'}") - # self.print(f"USE_WALBERLA={'ON' if self.sim._partitioner == DomainPartitioners.BlockForest else 'OFF'}") - # # self.print(f"COMPILE_CUDA={'ON' if self.target.is_gpu() else 'OFF'}") - # self.print.end() - def generate_object_reference(self, obj, device=False, index=None): if device and (not self.target.is_gpu() or not obj.device_flag): # Ideally this should never be called @@ -150,8 +140,6 @@ class CGen: self.print("}") def generate_preamble(self): - # self.print(f"#define APPLICATION_REFERENCE \"{self.ref}\"") - if self.target.is_gpu(): self.print("#include <math_constants.h>") @@ -166,15 +154,15 @@ class CGen: self.print("#include <stdlib.h>") self.print("//---") self.print("#include \"likwid-marker.h\"") - self.print("#include \"copper_fcc_lattice.hpp\"") - self.print("#include \"create_body.hpp\"") - self.print("#include \"dem_sc_grid.hpp\"") self.print("#include \"pairs.hpp\"") - self.print("#include \"read_from_file.hpp\"") - self.print("#include \"stats.hpp\"") - self.print("#include \"timing.hpp\"") - self.print("#include \"thermo.hpp\"") - self.print("#include \"vtk.hpp\"") + self.print("#include \"utility/copper_fcc_lattice.hpp\"") + self.print("#include \"utility/create_body.hpp\"") + self.print("#include \"utility/dem_sc_grid.hpp\"") + self.print("#include \"utility/read_from_file.hpp\"") + self.print("#include \"utility/stats.hpp\"") + self.print("#include \"utility/timing.hpp\"") + self.print("#include \"utility/thermo.hpp\"") + self.print("#include \"utility/vtk.hpp\"") self.print("") self.print("using namespace pairs;") self.print("") @@ -185,12 +173,6 @@ class CGen: if not module.interface: module_params += ["PairsRuntime *pairs_runtime", "struct PairsObjects *pobj"] - if module.name=="initialize" and self.sim.create_domain_at_initialization: - module_params += ["int argc", "char **argv"] - - if module.name=="set_domain": - module_params += ["int argc", "char **argv"] - module_params += [f"{Types.c_keyword(self.sim, param.type())} {param.name()}" for param in module.parameters()] print_params = ", ".join(module_params) @@ -203,9 +185,8 @@ class CGen: self.print("namespace pairs::internal {") self.print.add_indent(4) - # All modules except the interface ones are declared in the pairs::internal scope - for module in self.sim.modules() + self.sim.udf_modules(): - assert not module.interface + # All internal modules are declared in the pairs::internal scope + for module in self.sim.modules(): self.generate_module_header(module, definition=False) self.print.add_indent(-4) @@ -214,7 +195,7 @@ class CGen: def generate_pairs_object_structure(self): self.print("") - externkw = "" if self.sim._generate_whole_program else "extern " + externkw = "extern " if self.target.is_gpu(): for array in self.sim.arrays.statics(): if array.device_flag: @@ -290,34 +271,6 @@ class CGen: self.print("};") self.print("") - def generate_program(self, ast_node): - self.generate_interfaces() - ext = ".cu" if self.target.is_gpu() else ".cpp" - self.print = Printer(self.ref + ext) - self.print.start() - self.generate_preamble() - self.generate_pairs_object_structure() - self.generate_module_decls() - - self.print("namespace pairs::internal {") - self.print.add_indent(4) - - for kernel in self.sim.kernels(): - self.generate_kernel(kernel) - - for module in self.sim.modules(): - if module.name!='main': - self.generate_module(module) - - self.print.add_indent(-4) - self.print("}") - - for module in self.sim.modules(): - if module.name=='main': - self.generate_main(module) - - self.print.end() - def generate_library(self): self.generate_interfaces() # Generate CUDA/CPP file with modules @@ -351,9 +304,8 @@ class CGen: for kernel in self.sim.kernels(): self.generate_kernel(kernel) - # All modules except the interface ones are defined in the pairs::internal scope - for module in self.sim.modules() + self.sim.udf_modules(): - assert not module.interface + # All internal modules are defined in the pairs::internal scope + for module in self.sim.modules(): self.generate_module(module) self.print.add_indent(-4) @@ -452,42 +404,12 @@ class CGen: if feature_prop in module.host_references(): self.print(f"{type_kw} *{feature_prop.name()}_h = pobj->{feature_prop.name()};") - def generate_main(self, module): - assert module.name=='main' - - ndims = module.sim.ndims() - nprops = module.sim.properties.nprops() - ncontactprops = module.sim.contact_properties.nprops() - narrays = module.sim.arrays.narrays() - part = DomainPartitioners.c_keyword(module.sim.partitioner()) - - self.generate_full_object_names = True - self.print("int main(int argc, char **argv) {") - self.print(f" PairsRuntime *pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});") - self.print(f" struct PairsObjects *pobj = new PairsObjects();") - - if module.sim._enable_profiler: - self.print(" LIKWID_MARKER_INIT;") - - self.generate_statement(module.block) - - if module.sim._enable_profiler: - self.print(" LIKWID_MARKER_CLOSE;") - - self.print(" pairs::print_timers(pairs_runtime);") - self.print(" pairs::print_stats(pairs_runtime, pobj->nlocal, pobj->nghost);") - self.print(" delete pobj;") - self.print(" delete pairs_runtime;") - self.print(" return 0;") - self.print("}") - self.generate_full_object_names = False - def generate_module(self, module): self.generate_module_header(module, definition=True) self.print.add_indent(4) - if self.debug: - self.print(f"PAIRS_DEBUG(\"\\n{module.name}\\n\");") + # if self.debug: + # self.print(f"PAIRS_DEBUG(\"\\n{module.name}\\n\");") if not module.interface: self.generate_module_declerations(module) @@ -603,9 +525,7 @@ class CGen: if ast_node.check_for_resize(): resize = self.generate_expression(ast_node.resize) capacity = self.generate_expression(ast_node.capacity) - # self.print(f"printf (\" %d -- before AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});") self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});") - # self.print(f"printf (\" %d -- after AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});") else: self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});") @@ -617,7 +537,10 @@ class CGen: self.print.add_indent(-4) if isinstance(ast_node, Continue): - self.print("continue;") + if self.loop_scope: + self.print("continue;") + else: + self.print("return;") # TODO: Why there are Decls for other types? if isinstance(ast_node, Decl): @@ -755,7 +678,7 @@ class CGen: for i in matrix_op.indexes_to_generate(): lhs = self.generate_expression(matrix_op.lhs, matrix_op.mem, index=i) rhs = self.generate_expression(matrix_op.rhs, index=i) - operator = vector_op.operator() + operator = matrix_op.operator() if operator.is_unary(): self.print(f"const {self.real_type()} {matrix_op.name()}_{dim} = {operator.symbol()}({lhs});") @@ -806,9 +729,7 @@ class CGen: self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}, {size}); // {array_name}") else: - # self.print(f"std::cout<< \"{Printer.line_id} -- before {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;") self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}); // {array_name}") - # self.print(f"std::cout<< \"{Printer.line_id} -- after {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;") if isinstance(ast_node, CopyContactProperty): prop_id = ast_node.contact_prop().id() @@ -848,7 +769,9 @@ class CGen: self.print("#pragma omp parallel for") self.print(f"for(int {iterator} = {lower_range}; {iterator} < {upper_range}; {iterator}++) {{") + self.loop_scope = True self.generate_statement(ast_node.block) + self.loop_scope = False self.print("}") @@ -917,9 +840,6 @@ class CGen: if isinstance(ast_node, ModuleCall): module_params = ["pairs_runtime", "pobj"] - if ast_node.module.name=="init_domain": - module_params += ["argc", "argv"] - module_params += [f"{param.name()}" for param in ast_node.module.parameters()] print_params = ", ".join(module_params) @@ -1015,9 +935,6 @@ class CGen: if self.target.is_gpu() and fp.device_flag: self.print(f"pairs_runtime->copyFeaturePropertyToDevice({fp.id()}); // {fp.name()}") - if isinstance(ast_node, Timestep): - self.generate_statement(ast_node.block) - if isinstance(ast_node, ReallocProperty): p = ast_node.property() ptr_addr = self.generate_object_address(p) @@ -1060,7 +977,9 @@ class CGen: if isinstance(ast_node, While): cond = self.generate_expression(ast_node.cond) self.print(f"while({cond}) {{") + self.loop_scope = True self.generate_statement(ast_node.block) + self.loop_scope = False self.print("}") if isinstance(ast_node, Return): @@ -1098,9 +1017,6 @@ class CGen: if ast_node.name().startswith("pairs::"): extra_params += ["pairs_runtime"] - if ast_node.name() == "pairs_runtime->initDomain": - extra_params += ["&argc", "&argv"] - params = ", ".join(extra_params + [str(self.generate_expression(p)) for p in ast_node.parameters()]) return f"{ast_node.name()}({params})" diff --git a/src/pairs/code_gen/interface.py b/src/pairs/code_gen/interface.py index 6ed3f7f2b511137000e87f8db64c4a70894efd35..1f15c49f1001d7d04c209307eec6ee1083747207 100644 --- a/src/pairs/code_gen/interface.py +++ b/src/pairs/code_gen/interface.py @@ -20,7 +20,6 @@ from pairs.sim.domain_partitioners import DomainPartitioners from pairs.ir.print import PrintCode from pairs.ir.assign import Assign from pairs.sim.contact_history import BuildContactHistory, ClearUnusedContactHistory, ResetContactHistoryUsageStatus -from pairs.sim.thermo import ComputeThermo class InterfaceModules: def __init__(self, sim): @@ -28,7 +27,7 @@ class InterfaceModules: def create_all(self): self.initialize() - self.setup_sim() + self.setup_cells() self.update_domain() self.update_cells(self.sim.reneighbor_frequency) self.communicate(self.sim.reneighbor_frequency) @@ -40,9 +39,6 @@ class InterfaceModules: self.build_contact_history(self.sim.reneighbor_frequency) self.reset_contact_history() - if self.sim._compute_thermo != 0: - self.compute_thermo(self.sim._compute_thermo) - self.rank() self.nlocal() self.nghost() @@ -60,26 +56,27 @@ class InterfaceModules: PrintCode(self.sim, f"pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});") PrintCode(self.sim, f"pobj = new PairsObjects();") + if self.sim.grid is None: + self.sim.grid = MutableGrid(self.sim, self.sim.dims) + inits = Block.from_list(self.sim, [ + RegisterTimers(self.sim), + RegisterMarkers(self.sim), DeclareVariables(self.sim), DeclareArrays(self.sim), AllocateProperties(self.sim), AllocateContactProperties(self.sim), AllocateFeatureProperties(self.sim), - RegisterTimers(self.sim), - RegisterMarkers(self.sim) ]) - if self.sim.create_domain_at_initialization: - self.sim.add_statement(Block.merge_blocks(inits, self.sim.create_domain)) - else: - assert self.sim.grid is None, "A grid already exists" - self.sim.grid = MutableGrid(self.sim, self.sim.dims) - self.sim.add_statement(inits) + if self.sim._enable_profiler: + PrintCode(self.sim, "LIKWID_MARKER_INIT;") + + self.sim.add_statement(inits) @pairs_interface_block - def setup_sim(self): - self.sim.module_name('setup_sim') + def setup_cells(self): + self.sim.module_name('setup_cells') if self.sim.cell_lists.runtime_spacing: for d in range(self.sim.dims): @@ -88,7 +85,6 @@ class InterfaceModules: if self.sim.cell_lists.runtime_cutoff_radius: Assign(self.sim, self.sim.cell_lists.cutoff_radius, Parameter(self.sim, 'cutoff_radius', Types.Real)) - self.sim.add_statement(self.sim.setup_particles) # This update assumes all particles have been created exactly in the rank that contains them self.sim.add_statement(UpdateDomain(self.sim)) self.sim.add_statement(BuildCellListsStencil(self.sim, self.sim.cell_lists)) @@ -164,11 +160,6 @@ class InterfaceModules: self.sim.add_statement(ResetContactHistoryUsageStatus(self.sim, self.sim._contact_history)) self.sim.add_statement(ClearUnusedContactHistory(self.sim, self.sim._contact_history)) - @pairs_interface_block - def compute_thermo(self): - self.sim.module_name('compute_thermo') - self.sim.add_statement(ComputeThermo(self.sim)) - @pairs_interface_block def rank(self): self.sim.module_name('rank') @@ -189,63 +180,15 @@ class InterfaceModules: self.sim.module_name('size') Return(self.sim, ScalarOp.inline(self.sim.nlocal + self.sim.nghost)) - @pairs_interface_block - def create_sphere(self): - self.sim.module_name('create_sphere') - x = Parameter(self.sim, 'x', Types.Real) - y = Parameter(self.sim, 'y', Types.Real) - z = Parameter(self.sim, 'z', Types.Real) - vx = Parameter(self.sim, 'vx', Types.Real) - vy = Parameter(self.sim, 'vy', Types.Real) - vz = Parameter(self.sim, 'vz', Types.Real) - density = Parameter(self.sim, 'density', Types.Real) - radius = Parameter(self.sim, 'radius', Types.Real) - ptype = Parameter(self.sim, 'type', Types.Real) - flag = Parameter(self.sim, 'flag', Types.Real) - - Return(self.sim, Call(self.sim, "pairs::create_sphere", - [x, y, z, vx, vy, vz, - density, radius, ptype, flag], Types.UInt64)) - - @pairs_interface_block - def create_halfspace(self): - self.sim.module_name('create_halfspace') - x = Parameter(self.sim, 'x', Types.Real) - y = Parameter(self.sim, 'y', Types.Real) - z = Parameter(self.sim, 'z', Types.Real) - nx = Parameter(self.sim, 'nx', Types.Real) - ny = Parameter(self.sim, 'ny', Types.Real) - nz = Parameter(self.sim, 'nz', Types.Real) - ptype = Parameter(self.sim, 'type', Types.Real) - flag = Parameter(self.sim, 'flag', Types.Real) - - Return(self.sim, Call(self.sim, "pairs::create_halfspace", - [x, y, z, nx, ny, nz, ptype, flag], Types.UInt64)) - - @pairs_interface_block - def dem_sc_grid(self): - self.sim.module_name('dem_sc_grid') - xmax = Parameter(self.sim, 'xmax', Types.Real) - ymax = Parameter(self.sim, 'ymax', Types.Real) - zmax = Parameter(self.sim, 'zmax', Types.Real) - spacing = Parameter(self.sim, 'spacing', Types.Real) - diameter = Parameter(self.sim, 'diameter', Types.Real) - min_diameter = Parameter(self.sim, 'min_diameter', Types.Real) - max_diameter = Parameter(self.sim, 'max_diameter', Types.Real) - initial_velocity = Parameter(self.sim, 'initial_velocity', Types.Real) - particle_density = Parameter(self.sim, 'particle_density', Types.Real) - ntypes = Parameter(self.sim, 'ntypes', Types.Int32) - - Assign(self.sim, self.sim.nlocal, - Call_Int(self.sim, "pairs::dem_sc_grid", - [xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, - initial_velocity, particle_density, ntypes])) - Return(self.sim, self.sim.nlocal) - @pairs_interface_block def end(self): self.sim.module_name('end') + + if self.sim._enable_profiler: + PrintCode(self.sim, "LIKWID_MARKER_CLOSE;") + Call_Void(self.sim, "pairs::print_timers", []) + Call_Void(self.sim, "pairs::log_timers", []) Call_Void(self.sim, "pairs::print_stats", [self.sim.nlocal, self.sim.nghost]) PrintCode(self.sim, "delete pobj;") PrintCode(self.sim, "delete pairs_runtime;") diff --git a/src/pairs/ir/block.py b/src/pairs/ir/block.py index 2a14ea2776dcf3651a8f9d03b397bf9db1bd1fb6..5883d4157730380ff40dab9b9318f682bebc2cb8 100644 --- a/src/pairs/ir/block.py +++ b/src/pairs/ir/block.py @@ -12,9 +12,20 @@ def pairs_inline(func): return inner +def pairs_block(func): + """This decorator needs the owning class of the method being decorated to have a 'run_on_device' member. + If the variable doesn't exist it defaults to False here, hence pairs_host_block gets used.""" + def wrapper(*args, **kwargs): + self = args[0] + decorator = pairs_device_block if getattr(self, "run_on_device", False) else pairs_host_block + return decorator(func)(*args, **kwargs) + return wrapper + + def pairs_host_block(func): def inner(*args, **kwargs): sim = args[0].sim # self.sim + profile = getattr(args[0], "profile", False) sim.init_block() func(*args, **kwargs) return Module(sim, @@ -22,7 +33,8 @@ def pairs_host_block(func): block=Block(sim, sim._block), resizes_to_check=sim._resizes_to_check, check_properties_resize=sim._check_properties_resize, - run_on_device=False) + run_on_device=False, + profile=profile) return inner @@ -30,6 +42,7 @@ def pairs_host_block(func): def pairs_device_block(func): def inner(*args, **kwargs): sim = args[0].sim # self.sim + profile = getattr(args[0], "profile", False) sim.init_block() func(*args, **kwargs) return Module(sim, @@ -37,7 +50,8 @@ def pairs_device_block(func): block=Block(sim, sim._block), resizes_to_check=sim._resizes_to_check, check_properties_resize=sim._check_properties_resize, - run_on_device=True) + run_on_device=True, + profile=profile) return inner diff --git a/src/pairs/ir/loops.py b/src/pairs/ir/loops.py index 8842818627c7da514ec9a24ae85c84a5f08cd747..7d90c3c97e3fac7d4cb7cdf48b9d8b1d40720808 100644 --- a/src/pairs/ir/loops.py +++ b/src/pairs/ir/loops.py @@ -88,8 +88,8 @@ class For(ASTNode): class ParticleFor(For): - def __init__(self, sim, block=None, local_only=True): - super().__init__(sim, 0, sim.nlocal if local_only else sim.nlocal + sim.nghost, block) + def __init__(self, sim, block=None, local_only=True, not_kernel=False): + super().__init__(sim, 0, sim.nlocal if local_only else sim.nlocal + sim.nghost, block, not_kernel) self.local_only = local_only def __str__(self): diff --git a/src/pairs/ir/math.py b/src/pairs/ir/math.py index a6a156a4986a6a8b122dc70d8ca920ad18d29269..d9bd5730230d1f209b5c598f8b3ff55254f36496 100644 --- a/src/pairs/ir/math.py +++ b/src/pairs/ir/math.py @@ -83,7 +83,48 @@ class Abs(MathFunction): def type(self): return self._params[0].type() +class Min(MathFunction): + def __init__(self, sim, a, b): + super().__init__(sim) + self._params = [a, b] + + def __str__(self): + return f"Min<{self._params}>" + + def function_name(self): + return "MIN" + + def type(self): + return self._params[0].type() + +class Max(MathFunction): + def __init__(self, sim, a, b): + super().__init__(sim) + self._params = [a, b] + + def __str__(self): + return f"Max<{self._params}>" + + def function_name(self): + return "MAX" + def type(self): + return self._params[0].type() + +class Sign(MathFunction): + def __init__(self, sim, expr): + super().__init__(sim) + self._params = [expr] + + def __str__(self): + return f"Sign<{self._params}>" + + def function_name(self): + return "SIGN" + + def type(self): + return self._params[0].type() + class Sin(MathFunction): def __init__(self, sim, expr): super().__init__(sim) diff --git a/src/pairs/ir/module.py b/src/pairs/ir/module.py index ded67ac6f4448590c346f51177b17fe364b729d0..c7937ed9b478f2420a44633af040976884a18c58 100644 --- a/src/pairs/ir/module.py +++ b/src/pairs/ir/module.py @@ -16,8 +16,8 @@ class Module(ASTNode): block=None, resizes_to_check={}, check_properties_resize=False, - run_on_device=False, - user_defined=False, + run_on_device=False, + profile=False, interface=False): super().__init__(sim) self._id = Module.last_module @@ -29,24 +29,22 @@ class Module(ASTNode): self._contact_properties = {} self._feature_properties = {} self._host_references = set() + self._device_copies = set() self._block = block self._resizes_to_check = resizes_to_check self._check_properties_resize = check_properties_resize self._run_on_device = run_on_device - self._user_defined = user_defined self._interface = interface self._return_type = Types.Void - self._profile = False + self._profile = profile + + if profile: + self.sim.enable_profiler() - if user_defined: - assert not interface, ("User-defined modules can't be part of the interface directly." - "Wrap them inside seperate interface modules.") - sim.add_udf_module(self) + if interface: + sim.add_interface_module(self) else: - if interface: - sim.add_interface_module(self) - else: - sim.add_module(self) + sim.add_module(self) Module.last_module += 1 @@ -69,10 +67,6 @@ class Module(ASTNode): def run_on_device(self): return self._run_on_device - @property - def user_defined(self): - return self._user_defined - @property def interface(self): return self._interface @@ -115,6 +109,9 @@ class Module(ASTNode): def host_references(self): return self._host_references + def device_copies(self): + return self._device_copies + def add_array(self, array, write=False): array_list = array if isinstance(array, list) else [array] new_op = 'w' if write else 'r' @@ -185,6 +182,9 @@ class Module(ASTNode): def add_host_reference(self, elem): self._host_references.add(elem) + def add_device_copy(self, elem): + self._device_copies.add(elem) + def children(self): return [self._block] diff --git a/src/pairs/ir/symbols.py b/src/pairs/ir/symbols.py index f7ba470282bc9a8e1f0ef88a4a30437bdc711fbe..0c313c715074954eb8c149ea1df6db7125804575 100644 --- a/src/pairs/ir/symbols.py +++ b/src/pairs/ir/symbols.py @@ -7,16 +7,20 @@ from pairs.ir.types import Types class Symbol(ASTTerm): - def __init__(self, sim, sym_type): + def __init__(self, sim, sym_type, name=None): super().__init__(sim, OperatorClass.from_type(sym_type)) self.sym_type = sym_type self.assign_to = None + self.name = name def __str__(self): return f"Symbol<{Types.c_keyword(self.sim, self.sym_type)}>" def assign(self, node): self.assign_to = node + + def get_assigned_node(self): + return self.assign_to def type(self): return self.sym_type diff --git a/src/pairs/ir/timers.py b/src/pairs/ir/timers.py index b1cd21c0c80a675491bef0c10bfc98806b81e542..928a668cd80dcb0d180fe1fc8fa4386cd42ff5b6 100644 --- a/src/pairs/ir/timers.py +++ b/src/pairs/ir/timers.py @@ -1,12 +1,10 @@ class Timers: Invalid = -1 - All = 0 - Communication = 1 - DeviceTransfers = 2 - Offset = 3 + Communication = 0 + DeviceTransfers = 1 + Offset = 2 def name(timer): - return "all" if timer == Timers.All else \ - "mpi" if timer == Timers.Communication else \ - "transfers" if timer == Timers.DeviceTransfers else \ - "invalid" + return "MARKERS::MPI" if timer == Timers.Communication else \ + "MARKERS::DEVICE_TRANSFERS" if timer == Timers.DeviceTransfers else \ + "INVALID" diff --git a/src/pairs/ir/variables.py b/src/pairs/ir/variables.py index 00bcc698214a3938b5ae5c3b9b6917114f596c5c..049496c10c32eac0cec29bdbeab84de169c8b27e 100644 --- a/src/pairs/ir/variables.py +++ b/src/pairs/ir/variables.py @@ -4,6 +4,8 @@ from pairs.ir.assign import Assign from pairs.ir.scalars import ScalarOp from pairs.ir.lit import Lit from pairs.ir.operator_class import OperatorClass +from pairs.ir.types import Types +from pairs.ir.accessor_class import AccessorClass class Variables: @@ -23,10 +25,10 @@ class Variables: self.vars.append(var) return var - def add_temp(self, init): + def add_temp(self, init, type): lit = Lit.cvt(self.sim, init) tmp_id = Variables.new_temp_id() - tmp_var = Var(self.sim, f"tmp{tmp_id}", lit.type(), temp=True) + tmp_var = Var(self.sim, f"tmp{tmp_id}", lit.type() if type is None else type, temp=True) Assign(self.sim, tmp_var, lit) return tmp_var @@ -57,6 +59,11 @@ class Var(ASTTerm): def __str__(self): return f"Var<{self.var_name}>" + def __getitem__(self, index): + assert not Types.is_scalar(self.var_type) + _acc_class = AccessorClass.from_type(self.var_type) + return _acc_class(self.sim, self, Lit.cvt(self.sim, index)) + def copy(self, deep=False): # Terminal copies are just themselves return self diff --git a/src/pairs/mapping/funcs.py b/src/pairs/mapping/funcs.py index b4ec2f8f5fcb083a92f3eb5a43f6f33cc49093a5..f2b1e185fafeca9c1dca55d89ab6d710735c0134 100644 --- a/src/pairs/mapping/funcs.py +++ b/src/pairs/mapping/funcs.py @@ -13,7 +13,10 @@ from pairs.ir.types import Types from pairs.mapping.keywords import Keywords from pairs.sim.flags import Flags from pairs.sim.interaction import ParticleInteraction - +from pairs.sim.global_interaction import GlobalLocalInteraction, GlobalGlobalInteraction, GlobalReduction, SortGlobals, PackGlobals, ResetReductionProps, ReduceGlobals, UnpackGlobals +from pairs.ir.module import Module +from pairs.ir.block import Block, pairs_block +from pairs.sim.lowerable import Lowerable class UndefinedSymbol(): def __init__(self, symbol_id): @@ -284,12 +287,30 @@ class BuildParticleIR(ast.NodeVisitor): op_class = OperatorClass.from_type(operand.type()) return op_class(self.sim, operand, None, BuildParticleIR.get_unary_op(node.op)) - -def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=False, skip_first=False): - if sim._generate_whole_program: - assert not parameters, "Compute functions can't take custom parameters when generating whole program." +class OneBodyKernel(Lowerable): + def __init__(self, sim, module_name, run_on_device, profile): + super().__init__(sim) + self.block = Block(sim, []) + self.module_name = module_name + self.run_on_device = run_on_device + self.profile = profile + + def add_statement(self, stmt): + self.block.add_statement(stmt) + + def __iter__(self): + self.sim.add_statement(self) + self.sim.enter(self) + for i in ParticleFor(self.sim): + yield i + self.sim.leave() + @pairs_block + def lower(self): + self.sim.module_name(self.module_name) + self.sim.add_statement(self.block) +def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False, run_on_device=True, profile=False): src = inspect.getsource(func) tree = ast.parse(src, mode='exec') #print(ast.dump(ast.parse(src, mode='exec'))) @@ -311,14 +332,15 @@ def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=F sim.module_name(func.__name__) if nparams == 1: - for i in ParticleFor(sim): - for _ in Filter(sim, ScalarOp.cmp(sim.particle_flags[i] & Flags.Fixed, 0)): - ir = BuildParticleIR(sim, symbols, parameters) - ir.add_symbols({params[0]: i}) - ir.visit(tree) + for i in OneBodyKernel(sim, func.__name__, run_on_device=run_on_device, profile=profile): + ir = BuildParticleIR(sim, symbols, parameters) + ir.add_symbols({params[0]: i}) + ir.visit(tree) else: - for interaction_data in ParticleInteraction(sim, nparams, cutoff_radius): + # Compute local-local and local-global interactions + particle_interaction = ParticleInteraction(sim, func.__name__, nparams, cutoff_radius, run_on_device=run_on_device, profile=profile) + for interaction_data in particle_interaction: # Start building IR ir = BuildParticleIR(sim, symbols, parameters) ir.add_symbols({ @@ -332,43 +354,59 @@ def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=F '__contact_point__': interaction_data.contact_point(), '__contact_normal__': interaction_data.contact_normal() }) - ir.visit(tree) - if sim._generate_whole_program: - if pre_step: - sim.build_pre_step_module_with_statements(skip_first=skip_first, profile=True) - else: - sim.build_module_with_statements(skip_first=skip_first, profile=True) - else: - sim.build_user_defined_function() - -def setup(sim, func, symbols={}): - src = inspect.getsource(func) - tree = ast.parse(src, mode='exec') - - # Fetch function info - info = FetchParticleFuncInfo() - info.visit(tree) - params = info.params() - nparams = info.nparams() - - # Compute functions must have parameters - assert nparams == 1, "Number of parameters from setup functions must be one!" + if compute_globals: + # If compute_globals is enabled, global interactions and reductions become + # part of the same module as local interactions + global_reduction = GlobalReduction(sim, func.__name__, particle_interaction) + + SortGlobals(global_reduction) # Sort global bodies with respect to their uid + PackGlobals(global_reduction) # Pack reduction properties in uid order in an intermediate buffer + ResetReductionProps(global_reduction) # Reset reduction properties to be prepared for local reduction + + # Compute local contirbutions on global bodies + global_local_interactions = GlobalLocalInteraction(sim, func.__name__, nparams) + for interaction_data in global_local_interactions: + ir = BuildParticleIR(sim, symbols, parameters) + ir.add_symbols({ + params[0]: interaction_data.i(), + params[1]: interaction_data.j(), + '__i__': interaction_data.i(), + '__j__': interaction_data.j(), + '__delta__': interaction_data.delta(), + '__squared_distance__': interaction_data.squared_distance(), + '__penetration_depth__': interaction_data.penetration_depth(), + '__contact_point__': interaction_data.contact_point(), + '__contact_normal__': interaction_data.contact_normal() + }) + ir.visit(tree) - # Convert literal symbols - symbols = {symbol: Lit.cvt(sim, value) for symbol, value in symbols.items()} - sim.init_block() - sim.module_name(func.__name__) + PackGlobals(global_reduction, False) # Pack local contributions in reduction buffer in uid order + ReduceGlobals(global_reduction) # Inplace reduce local contributions over global bodies in the reduction buffer + UnpackGlobals(global_reduction) # Add the reduced properties with the intermediate buffer and unpack into global bodies - for i in ParticleFor(sim): - ir = BuildParticleIR(sim, symbols) - ir.add_symbols({params[0]: i}) - ir.visit(tree) + # Compute global-global interactions + global_global_interactions = GlobalGlobalInteraction(sim, func.__name__, nparams) + for interaction_data in global_global_interactions: + ir = BuildParticleIR(sim, symbols, parameters) + ir.add_symbols({ + params[0]: interaction_data.i(), + params[1]: interaction_data.j(), + '__i__': interaction_data.i(), + '__j__': interaction_data.j(), + '__delta__': interaction_data.delta(), + '__squared_distance__': interaction_data.squared_distance(), + '__penetration_depth__': interaction_data.penetration_depth(), + '__contact_point__': interaction_data.contact_point(), + '__contact_normal__': interaction_data.contact_normal() + }) + ir.visit(tree) - if sim._generate_whole_program: - sim.build_setup_module_with_statements() - else: - sim.build_user_defined_function() + + + # User defined functions are wrapped inside seperate interface modules here. + # The udf's have the same name as their interface module but they get implemented in the pairs::internal scope. + sim.build_interface_module_with_statements(run_on_device) diff --git a/src/pairs/mapping/keywords.py b/src/pairs/mapping/keywords.py index 51c255de3767f567182afa8dc5616e5f85151604..f3959e0b1c746b9a82f6df072927cc3852c9b13f 100644 --- a/src/pairs/mapping/keywords.py +++ b/src/pairs/mapping/keywords.py @@ -13,7 +13,7 @@ from pairs.ir.types import Types from pairs.ir.print import Print from pairs.ir.vectors import Vector, ZeroVector from pairs.sim.shapes import Shapes - +from pairs.sim.flags import Flags class Keywords: def __init__(self, sim): @@ -47,12 +47,42 @@ class Keywords: assert particle_id.type() == Types.Int32, "Particle ID must be an integer." return ScalarOp.cmp(self.sim.particle_shape[particle_id], Shapes.Sphere) + def keyword_is_box(self, args): + assert len(args) == 1, "is_box() keyword requires one parameter." + particle_id = args[0] + assert particle_id.type() == Types.Int32, "Particle ID must be an integer." + return ScalarOp.cmp(self.sim.particle_shape[particle_id], Shapes.Box) + def keyword_is_halfspace(self, args): assert len(args) == 1, "is_sphere() keyword requires one parameter." particle_id = args[0] assert particle_id.type() == Types.Int32, "Particle ID must be an integer." return ScalarOp.cmp(self.sim.particle_shape[particle_id], Shapes.Halfspace) + def keyword_is_infinite(self, args): + assert len(args) == 1, "is_infinite() keyword requires one parameter." + particle_id = args[0] + assert particle_id.type() == Types.Int32, "Particle ID must be an integer." + return self.sim.particle_flags[particle_id] & Flags.Infinite + + def keyword_is_ghost(self, args): + assert len(args) == 1, "is_ghost() keyword requires one parameter." + particle_id = args[0] + assert particle_id.type() == Types.Int32, "Particle ID must be an integer." + return self.sim.particle_flags[particle_id] & Flags.Ghost + + def keyword_is_fixed(self, args): + assert len(args) == 1, "is_fixed() keyword requires one parameter." + particle_id = args[0] + assert particle_id.type() == Types.Int32, "Particle ID must be an integer." + return self.sim.particle_flags[particle_id] & Flags.Fixed + + def keyword_is_global(self, args): + assert len(args) == 1, "is_global() keyword requires one parameter." + particle_id = args[0] + assert particle_id.type() == Types.Int32, "Particle ID must be an integer." + return self.sim.particle_flags[particle_id] & Flags.Global + def keyword_select(self, args): assert len(args) == 3, "select() keyword requires three parameters!" return Select(self.sim, args[0], args[1], args[2]) @@ -125,6 +155,10 @@ class Keywords: assert len(args) == 0, "zero_vector() keyword requires no parameter." return ZeroVector(self.sim) + def keyword_vector(self, args): + assert len(args) == self.sim.ndims(), "vector()" + return Vector(self.sim, [args[i] for i in range(self.sim.ndims())]) + def keyword_transposed(self, args): assert len(args) == 1, "transposed() keyword requires one parameter." matrix = args[0] @@ -151,13 +185,18 @@ class Keywords: inv_det * ((matrix[3] * matrix[7]) - (matrix[4] * matrix[6])), inv_det * ((matrix[6] * matrix[1]) - (matrix[7] * matrix[0])), inv_det * ((matrix[0] * matrix[4]) - (matrix[1] * matrix[3])) ]) - + def keyword_diagonal_matrix(self, args): - assert len(args) == 1, "diagonal_matrix() keyword requires one parameter!" - value = args[0] - nelems = Types.number_of_elements(self.sim, Types.Matrix) - return Matrix(self.sim, [value if i % (self.sim.ndims() + 1) == 0 else 0.0 \ - for i in range(nelems)]) + assert len(args) == 1 or len(args) == self.sim.ndims(), f"diagonal_matrix() keyword requires 1 or {self.sim.ndims()} parameters!" + if len(args) == 1: + value = args[0] + nelems = Types.number_of_elements(self.sim, Types.Matrix) + return Matrix(self.sim, [value if i % (self.sim.ndims() + 1) == 0 else 0.0 \ + for i in range(nelems)]) + elif len(args) == self.sim.ndims(): + nelems = Types.number_of_elements(self.sim, Types.Matrix) + return Matrix(self.sim, [args[i % self.sim.ndims()] if i % (self.sim.ndims() + 1) == 0 else 0.0 \ + for i in range(nelems)]) def keyword_matrix_multiplication(self, args): assert len(args) == 2, "matrix_multiplication() keyword requires two parameters!" diff --git a/src/pairs/sim/cell_lists.py b/src/pairs/sim/cell_lists.py index 4038f48fb628f6c00c55c5cf535b43e8ce1a9af3..850198494d377fc09725b287efc0fd838049d17c 100644 --- a/src/pairs/sim/cell_lists.py +++ b/src/pairs/sim/cell_lists.py @@ -27,7 +27,6 @@ class CellLists: self.spacing = spacing if isinstance(spacing, list) else [spacing for d in range(sim.ndims())] self.runtime_spacing = False else: - assert self.sim._generate_whole_program==False, "Cell spacing needs to be defined when generating whole program." self.spacing = self.sim.add_array('spacing', self.sim.ndims(), Types.Real) self.runtime_spacing = True @@ -35,7 +34,6 @@ class CellLists: self.cutoff_radius = cutoff_radius self.runtime_cutoff_radius = False else: - assert self.sim._generate_whole_program==False, "cutoff_radius needs to be defined when generating whole program." self.cutoff_radius = self.sim.add_var('cutoff_radius', Types.Real) self.runtime_cutoff_radius = True @@ -132,7 +130,7 @@ class BuildCellLists(Lowerable): for i in ParticleFor(self.sim, local_only=False): flat_index = self.sim.add_temp_var(0) - for _ in Filter(self.sim, ASTTerm.not_op(particle_flags[i] & Flags.Infinite)): + for _ in Filter(self.sim, ASTTerm.not_op(particle_flags[i] & (Flags.Infinite | Flags.Global))): cell_index = [ Cast.int(self.sim, (positions[i][dim] - (dom_part.min(dim) - spacing[dim])) / spacing[dim]) \ diff --git a/src/pairs/sim/copper_fcc_lattice.py b/src/pairs/sim/copper_fcc_lattice.py deleted file mode 100644 index ba966a5f91970332a041d1595c7ccdbccf70d64e..0000000000000000000000000000000000000000 --- a/src/pairs/sim/copper_fcc_lattice.py +++ /dev/null @@ -1,34 +0,0 @@ -from pairs.ir.assign import Assign -from pairs.ir.block import pairs_inline -from pairs.ir.functions import Call_Int, Call_Void -from pairs.ir.particle_attributes import ParticleAttributeList -from pairs.ir.types import Types -from pairs.sim.grid import Grid3D -from pairs.sim.lowerable import Lowerable - - -class CopperFCCLattice(Lowerable): - def __init__(self, sim, nx, ny, nz, rho, temperature, ntypes): - super().__init__(sim) - self._nx = nx - self._ny = ny - self._nz = nz - self._rho = rho - self._temperature = temperature - self._ntypes = ntypes - lattice = pow((4.0 / rho), (1.0 / 3.0)) - self._xprd = nx * lattice - self._yprd = ny * lattice - self._zprd = nz * lattice - sim.set_domain([0.0, 0.0, 0.0, self._xprd, self._yprd, self._zprd]) - - @pairs_inline - def lower(self): - Assign(self.sim, self.sim.nlocal, - Call_Int(self.sim, "pairs::copper_fcc_lattice", - [self._nx, self._ny, self._nz, - self._xprd, self._yprd, self._zprd, - self._rho, self._ntypes])) - - Call_Void(self.sim, "pairs::adjust_thermo", - [self.sim.nlocal, self._xprd, self._yprd, self._zprd, self._temperature]) diff --git a/src/pairs/sim/dem_sc_grid.py b/src/pairs/sim/dem_sc_grid.py deleted file mode 100644 index c7ef027bf8308d748f139a06afc0953ac649b650..0000000000000000000000000000000000000000 --- a/src/pairs/sim/dem_sc_grid.py +++ /dev/null @@ -1,29 +0,0 @@ -from pairs.ir.assign import Assign -from pairs.ir.block import pairs_inline -from pairs.ir.functions import Call_Int -from pairs.sim.lowerable import Lowerable - - -class DEMSCGrid(Lowerable): - def __init__( - self, sim, xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, initial_velocity, particle_density, ntypes): - - super().__init__(sim) - self._xmax = xmax - self._ymax = ymax - self._zmax = zmax - self._spacing = spacing - self._diameter = diameter - self._min_diameter = min_diameter - self._max_diameter = max_diameter - self._initial_velocity = initial_velocity - self._particle_density = particle_density - self._ntypes = ntypes - #sim.set_domain([0.0, 0.0, 0.0, self._xprd, self._yprd, self._zprd]) - - @pairs_inline - def lower(self): - Assign(self.sim, self.sim.nlocal, - Call_Int(self.sim, "pairs::dem_sc_grid", - [self._xmax, self._ymax, self._zmax, self._spacing, self._diameter, self._min_diameter, self._max_diameter, - self._initial_velocity, self._particle_density, self._ntypes])) diff --git a/src/pairs/sim/domain.py b/src/pairs/sim/domain.py index 2560f1a8759727a4d7c0167da12955a88c67aa63..dfa73de79e07c2deb4476c3ae4e4f8902c685b63 100644 --- a/src/pairs/sim/domain.py +++ b/src/pairs/sim/domain.py @@ -1,14 +1,6 @@ from pairs.ir.block import pairs_inline from pairs.sim.lowerable import Lowerable -class InitializeDomain(Lowerable): - def __init__(self, sim): - super().__init__(sim) - - @pairs_inline - def lower(self): - self.sim.domain_partitioning().initialize() - class UpdateDomain(Lowerable): def __init__(self, sim): super().__init__(sim) diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py index 485e1a6cbb8c8fe54dabdadd7efd0e7368c3d23b..f90f61f5fe7a35bb4e55db116680b383314e7a17 100644 --- a/src/pairs/sim/domain_partitioning.py +++ b/src/pairs/sim/domain_partitioning.py @@ -11,8 +11,7 @@ from pairs.sim.grid import MutableGrid from pairs.ir.device import CopyArray from pairs.ir.contexts import Contexts from pairs.ir.actions import Actions -from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms -from pairs.ir.print import PrintCode + class DimensionRanges: def __init__(self, sim): self.sim = sim @@ -45,10 +44,6 @@ class DimensionRanges: def reduce_sum_step_indexes(self, step, array): return sum([array[i] for i in self.step_indexes(step)]) - def initialize(self): - grid_array = [self.sim.grid.min(d) for d in range(self.sim.ndims())] + [self.sim.grid.max(d) for d in range(self.sim.ndims())] - Call_Void(self.sim, "pairs_runtime->initDomain", grid_array) - def update(self): Call_Void(self.sim, "pairs_runtime->updateDomain", []) Assign(self.sim, self.rank, Call_Int(self.sim, "pairs_runtime->getDomainPartitioner()->getRank", [])) @@ -140,48 +135,33 @@ class BlockForest: return self.reduce_step - def initialize(self): - grid_array = [self.sim.grid.min(d) for d in range(self.sim.ndims())] + [self.sim.grid.max(d) for d in range(self.sim.ndims())] - - Call_Void(self.sim, "pairs_runtime->initDomain", - grid_array + self.sim._pbc + ([True] if self.load_balancer is not None else [])) - - if self.load_balancer is not None: - PrintCode(self.sim, "pairs_runtime->getDomainPartitioner()->initWorkloadBalancer" - f"({LoadBalancingAlgorithms.c_keyword(self.load_balancer)}, {self.regrid_min}, {self.regrid_max});") - - # Call_Void(self.sim, "pairs_runtime->getDomainPartitioner()->initWorkloadBalancer", - # [self.load_balancer, self.regrid_min, self.regrid_max]) - def update(self): Call_Void(self.sim, "pairs_runtime->updateDomain", []) Assign(self.sim, self.rank, Call_Int(self.sim, "pairs_runtime->getDomainPartitioner()->getRank", [])) Assign(self.sim, self.nranks, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborRanks", [])) + Assign(self.sim, self.ntotal_aabbs, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborAABBs", [])) - for _ in Filter(self.sim, ScalarOp.neq(self.nranks, 0)): - Assign(self.sim, self.ntotal_aabbs, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborAABBs", [])) - - for _ in Filter(self.sim, self.nranks_capacity < self.nranks): - Assign(self.sim, self.nranks_capacity, self.nranks + 10) - self.ranks.realloc() - self.naabbs.realloc() - self.aabb_offsets.realloc() + for _ in Filter(self.sim, self.nranks_capacity < self.nranks): + Assign(self.sim, self.nranks_capacity, self.nranks + 10) + self.ranks.realloc() + self.naabbs.realloc() + self.aabb_offsets.realloc() - for _ in Filter(self.sim, self.aabb_capacity < self.ntotal_aabbs): - Assign(self.sim, self.aabb_capacity, self.ntotal_aabbs + 20) - self.aabbs.realloc() - - CopyArray(self.sim, self.ranks, Contexts.Host, Actions.WriteOnly, self.nranks) - CopyArray(self.sim, self.naabbs, Contexts.Host, Actions.WriteOnly, self.nranks) - CopyArray(self.sim, self.aabb_offsets, Contexts.Host, Actions.WriteOnly, self.nranks) - CopyArray(self.sim, self.aabbs, Contexts.Host, Actions.WriteOnly, self.ntotal_aabbs * 6) - CopyArray(self.sim, self.subdom, Contexts.Host, Actions.WriteOnly) - - Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['ranks', self.ranks, self.nranks]) - Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks]) - Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabb_offsets', self.aabb_offsets, self.nranks]) - Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabbs', self.aabbs, self.ntotal_aabbs * 6]) - Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2]) + for _ in Filter(self.sim, self.aabb_capacity < self.ntotal_aabbs): + Assign(self.sim, self.aabb_capacity, self.ntotal_aabbs + 20) + self.aabbs.realloc() + + CopyArray(self.sim, self.ranks, Contexts.Host, Actions.WriteOnly, self.nranks) + CopyArray(self.sim, self.naabbs, Contexts.Host, Actions.WriteOnly, self.nranks) + CopyArray(self.sim, self.aabb_offsets, Contexts.Host, Actions.WriteOnly, self.nranks) + CopyArray(self.sim, self.aabbs, Contexts.Host, Actions.WriteOnly, self.ntotal_aabbs * 6) + CopyArray(self.sim, self.subdom, Contexts.Host, Actions.WriteOnly) + + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['ranks', self.ranks, self.nranks]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabb_offsets', self.aabb_offsets, self.nranks]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabbs', self.aabbs, self.ntotal_aabbs * 6]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2]) if isinstance(self.sim.grid, MutableGrid): for d in range(self.sim.dims): @@ -229,7 +209,7 @@ class BlockForest: for _ in Filter(self.sim, full_cond): yield i, r, self.ranks[r], pbc_shifts - for _ in Filter(self.sim, ScalarOp.cmp(self.ranks[r] , self.rank)): # if my neighbor is me (cuz I'm the only rank in a dimension that has pbc) + for _ in Filter(self.sim, ScalarOp.cmp(self.ranks[r] , self.rank)): # if my neighbor is me pbc_shifts = [] isghost = Lit(self.sim, 0) diff --git a/src/pairs/sim/global_interaction.py b/src/pairs/sim/global_interaction.py new file mode 100644 index 0000000000000000000000000000000000000000..dca2a775ee301def37a7d334fd9232d878aac42b --- /dev/null +++ b/src/pairs/sim/global_interaction.py @@ -0,0 +1,252 @@ + +from pairs.ir.assign import Assign +from pairs.ir.scalars import ScalarOp +from pairs.ir.block import pairs_inline, pairs_device_block, pairs_host_block +from pairs.ir.branches import Filter +from pairs.ir.loops import For, ParticleFor +from pairs.ir.types import Types +from pairs.ir.device import CopyArray +from pairs.ir.contexts import Contexts +from pairs.ir.actions import Actions +from pairs.ir.sizeof import Sizeof +from pairs.ir.functions import Call_Void +from pairs.ir.cast import Cast +from pairs.sim.flags import Flags +from pairs.sim.lowerable import Lowerable +from pairs.sim.interaction import ParticleInteraction + + +class GlobalLocalInteraction(ParticleInteraction): + def __init__(self, sim, module_name, nbody, cutoff_radius=None, use_cell_lists=False): + super().__init__(sim, module_name, nbody, cutoff_radius, use_cell_lists) + + @pairs_device_block + def lower(self): + self.sim.module_name(f"{self.module_name}_global_local_interactions") + if self.sim._target.is_gpu(): + first_cell_bytes = self.sim.add_temp_var(0) + Assign(self.sim, first_cell_bytes, self.cell_lists.cell_capacity * Sizeof(self.sim, Types.Int32)) + CopyArray(self.sim, self.cell_lists.cell_sizes, Contexts.Host, Actions.ReadOnly, first_cell_bytes) + + for ishape in range(self.maxs): + if self.include_shape(ishape): + # Loop over the global cell + for p in For(self.sim, 0, self.cell_lists.cell_sizes[0]): + i = self.cell_lists.cell_particles[0][p] + # TODO: Skip if the bounding box of the global body doesn't intersect the subdom of this rank + for _ in Filter(self.sim, ScalarOp.and_op( + ScalarOp.cmp(self.sim.particle_shape[i], self.sim.get_shape_id(ishape)), + self.sim.particle_flags[i] & (Flags.Infinite | Flags.Global))): + for jshape in range(self.maxs): + if self.include_interaction(ishape, jshape): + # Globals are presenet in all ranks so they should not interact with ghosts + # TODO: Make this loop the kernel candiate and reduce forces on the global body + for j in ParticleFor(self.sim): + # Here we make make sure not to interact with other global bodies, otherwise + # their contributions will get reduced again over all ranks + for _ in Filter(self.sim, ScalarOp.and_op( + ScalarOp.cmp(self.sim.particle_shape[j], self.sim.get_shape_id(jshape)), + ScalarOp.not_op(self.sim.particle_flags[j] & (Flags.Infinite | Flags.Global)))): + for _ in Filter(self.sim, ScalarOp.neq(i, j)): + self.compute_interaction(i, j, ishape, jshape) + + +class GlobalGlobalInteraction(ParticleInteraction): + def __init__(self, sim, module_name, nbody, cutoff_radius=None, use_cell_lists=False): + super().__init__(sim, module_name, nbody, cutoff_radius, use_cell_lists) + + @pairs_device_block + def lower(self): + self.sim.module_name(f"{self.module_name}_global_global_interactions") + if self.sim._target.is_gpu(): + first_cell_bytes = self.sim.add_temp_var(0) + Assign(self.sim, first_cell_bytes, self.cell_lists.cell_capacity * Sizeof(self.sim, Types.Int32)) + CopyArray(self.sim, self.cell_lists.cell_sizes, Contexts.Host, Actions.ReadOnly, first_cell_bytes) + + for ishape in range(self.maxs): + if self.include_shape(ishape): + # Loop over the global cell + for p in For(self.sim, 0, self.cell_lists.cell_sizes[0]): + i = self.cell_lists.cell_particles[0][p] + for _ in Filter(self.sim, ScalarOp.and_op( + ScalarOp.cmp(self.sim.particle_shape[i], self.sim.get_shape_id(ishape)), + self.sim.particle_flags[i] & (Flags.Infinite | Flags.Global))): + for jshape in range(self.maxs): + if self.include_interaction(ishape, jshape): + # Loop over the global cell + for q in For(self.sim, 0, self.cell_lists.cell_sizes[0]): + j = self.cell_lists.cell_particles[0][q] + # Here we only compute interactions with other global bodies + for _ in Filter(self.sim, ScalarOp.and_op( + ScalarOp.cmp(self.sim.particle_shape[j], self.sim.get_shape_id(jshape)), + (self.sim.particle_flags[j] & (Flags.Infinite | Flags.Global)))): + for _ in Filter(self.sim, ScalarOp.neq(i, j)): + self.compute_interaction(i, j, ishape, jshape) + +class GlobalReduction: + def __init__(self, sim, module_name, particle_interaction): + self.sim = sim + self.module_name = module_name + self.particle_interaction = particle_interaction + self.nglobal_red = sim.add_var('nglobal_red', Types.Int32) # Number of global particles that need reduction + self.nglobal_capacity = sim.add_var('nglobal_capacity', Types.Int32, 64) + self.global_elem_capacity = sim.add_var('global_elem_capacity', Types.Int32, 100) + self.red_buffer = sim.add_array('red_buffer', [self.nglobal_capacity, self.global_elem_capacity], Types.Real, arr_sync=False) + self.intermediate_buffer = sim.add_array('intermediate_buffer', [self.nglobal_capacity, self.global_elem_capacity], Types.Real, arr_sync=False) + self.sorted_idx = sim.add_array('sorted_idx', [self.nglobal_capacity], Types.Int32, arr_sync=False) + self.unsorted_idx = sim.add_array('unsorted_idx', [self.nglobal_capacity], Types.Int32, arr_sync=False) + self.removed_idx = sim.add_array('removed_idx', [self.nglobal_capacity], Types.Boolean, arr_sync=False) + + self.red_props = set() + for ishape in range(self.sim.max_shapes()): + for jshape in range(self.sim.max_shapes()): + if self.particle_interaction.include_interaction(ishape, jshape): + for app in self.particle_interaction.apply_list[ishape*self.sim.max_shapes() + jshape]: + self.red_props.add(app.prop()) + + def global_particles(self): + for p in For(self.sim, 0, self.sim.cell_lists.cell_sizes[0]): + i = self.sim.cell_lists.cell_particles[0][p] + for ishape in range(self.sim.max_shapes()): + if self.particle_interaction.include_shape(ishape): + for _ in Filter(self.sim, ScalarOp.and_op( + ScalarOp.cmp(self.sim.particle_shape[i], self.sim.get_shape_id(ishape)), + self.sim.particle_flags[i] & (Flags.Infinite | Flags.Global))): + yield i + + def get_elems_per_particle(self): + return sum([Types.number_of_elements(self.sim, p.type()) for p in self.red_props]) + + +class SortGlobals(Lowerable): + def __init__(self, global_reduction): + super().__init__(global_reduction.sim) + self.global_reduction = global_reduction + self.sim.add_statement(self) + + @pairs_host_block + def lower(self): + self.sim.module_name(f"{self.global_reduction.module_name}_sort_globals") + nglobal_capacity = self.global_reduction.nglobal_capacity + nglobal_red = self.global_reduction.nglobal_red + unsorted_idx = self.global_reduction.unsorted_idx + sorted_idx = self.global_reduction.sorted_idx + removed_idx = self.global_reduction.removed_idx + uid = self.sim.particle_uid + self.sim.check_resize(nglobal_capacity, nglobal_red) + + Assign(self.sim, nglobal_red, 0) + for i in self.global_reduction.global_particles(): + Assign(self.sim, unsorted_idx[nglobal_red], i) + Assign(self.sim, sorted_idx[nglobal_red], 0) + Assign(self.sim, removed_idx[nglobal_red], 0) + Assign(self.sim, nglobal_red, nglobal_red +1) + + min_uid = self.sim.add_temp_var(0, Types.UInt64) + min_idx = self.sim.add_temp_var(0) + + # Here we sort indices of global bodies with respect to their uid's. + # The sorted uid's will be in identical order on all ranks. This ensures that the + # reduced properties are mapped correctly to each global body during inplace reduction. + for i in For(self.sim, 0, nglobal_red): + Assign(self.sim, min_uid, -1) # TODO: Lit max: UINT64_MAX + Assign(self.sim, min_idx, 0) + for j in For(self.sim, 0, nglobal_red): + for _ in Filter(self.sim, ScalarOp.and_op(uid[unsorted_idx[j]] < min_uid, + ScalarOp.not_op(removed_idx[j]))): + Assign(self.sim, min_uid, uid[unsorted_idx[j]]) + Assign(self.sim, min_idx, j) + + Assign(self.sim, sorted_idx[i], unsorted_idx[min_idx]) + Assign(self.sim, removed_idx[min_idx], 1) + + +class PackGlobals(Lowerable): + def __init__(self, global_reduction, save_state=True): + super().__init__(global_reduction.sim) + self.global_reduction = global_reduction + self.save_state = save_state + self.buffer = global_reduction.intermediate_buffer if save_state else global_reduction.red_buffer + self.sim.add_statement(self) + + @pairs_device_block + def lower(self): + self.sim.module_name(f"{self.global_reduction.module_name}_pack_globals_{'intermediate' if self.save_state else 'reduce'}") + nglobal_red = self.global_reduction.nglobal_red + sorted_idx = self.global_reduction.sorted_idx + nelems_per_particle = self.global_reduction.get_elems_per_particle() + self.buffer.set_stride(1, nelems_per_particle) + + for buffer_idx in For(self.sim, 0, nglobal_red): + i = sorted_idx[buffer_idx] + p_offset = 0 + for p in self.global_reduction.red_props: + if not Types.is_scalar(p.type()): + nelems = Types.number_of_elements(self.sim, p.type()) + for e in range(nelems): + Assign(self.sim, self.buffer[buffer_idx][p_offset + e], p[i][e]) + + p_offset += nelems + else: + cast_fn = lambda x: Cast(self.sim, x, Types.Real) if p.type() != Types.Real else x + Assign(self.sim, self.buffer[buffer_idx][p_offset], cast_fn(p[i])) + p_offset += 1 + + +class ResetReductionProps(Lowerable): + def __init__(self, global_reduction): + super().__init__(global_reduction.sim) + self.global_reduction = global_reduction + self.sim.add_statement(self) + + @pairs_device_block + def lower(self): + self.sim.module_name(f"{self.global_reduction.module_name}_reset_globals") + nglobal_red = self.global_reduction.nglobal_red + sorted_idx = self.global_reduction.sorted_idx + + for buffer_idx in For(self.sim, 0, nglobal_red): + i = sorted_idx[buffer_idx] + for p in self.global_reduction.red_props: + Assign(self.sim, p[i], 0.0) + +class ReduceGlobals(Lowerable): + def __init__(self, global_reduction): + super().__init__(global_reduction.sim) + self.global_reduction = global_reduction + self.sim.add_statement(self) + + @pairs_inline + def lower(self): + nelems_total = self.global_reduction.nglobal_red * self.global_reduction.get_elems_per_particle() + Call_Void( self.sim, "pairs_runtime->allReduceInplaceSum", [self.global_reduction.red_buffer, nelems_total]) + + +class UnpackGlobals(Lowerable): + def __init__(self, global_reduction): + super().__init__(global_reduction.sim) + self.global_reduction = global_reduction + self.sim.add_statement(self) + + @pairs_device_block + def lower(self): + self.sim.module_name(f"{self.global_reduction.module_name}_unpack_globals") + nglobal_red = self.global_reduction.nglobal_red + sorted_idx = self.global_reduction.sorted_idx + red_buffer = self.global_reduction.red_buffer + intermediate_buffer = self.global_reduction.intermediate_buffer + + for buffer_idx in For(self.sim, 0, nglobal_red): + i = sorted_idx[buffer_idx] + p_offset = 0 + for p in self.global_reduction.red_props: + if not Types.is_scalar(p.type()): + nelems = Types.number_of_elements(self.sim, p.type()) + for e in range(nelems): + Assign(self.sim, p[i][e], red_buffer[buffer_idx][p_offset + e] + intermediate_buffer[buffer_idx][p_offset + e]) + + p_offset += nelems + else: + cast_fn = lambda x: Cast(self.sim, x, p.type()) if p.type() != Types.Real else x + Assign(self.sim, p[i], cast_fn(red_buffer[buffer_idx][p_offset] + intermediate_buffer[buffer_idx][p_offset + e])) + p_offset += 1 \ No newline at end of file diff --git a/src/pairs/sim/instrumentation.py b/src/pairs/sim/instrumentation.py index 7281f13fc9f848038c4df34248d65db7e8e13c8c..6e901986733b92d76aa56f40af8682cf6107685a 100644 --- a/src/pairs/sim/instrumentation.py +++ b/src/pairs/sim/instrumentation.py @@ -1,6 +1,7 @@ from pairs.ir.block import pairs_inline from pairs.ir.functions import Call_Void from pairs.ir.timers import Timers +from pairs.ir.types import Types from pairs.sim.lowerable import FinalLowerable class RegisterTimers(FinalLowerable): @@ -12,9 +13,14 @@ class RegisterTimers(FinalLowerable): for t in range(Timers.Offset): Call_Void(self.sim, "pairs::register_timer", [t, Timers.name(t)]) - for m in self.sim.module_list: - if m.name != 'main' and m.name != 'initialize': - Call_Void(self.sim, "pairs::register_timer", [m.module_id + Timers.Offset, m.name]) + # Interface modules + for m in self.sim.interface_modules(): + if m.name != 'initialize' and m.name != 'end' and m.return_type==Types.Void: + Call_Void(self.sim, "pairs::register_timer", [m.module_id + Timers.Offset, "INTERFACE_MODULES::" + m.name]) + + # Internal modules + for m in self.sim.modules(): + Call_Void(self.sim, "pairs::register_timer", [m.module_id + Timers.Offset, "INTERNAL_MODULES::" + m.name]) class RegisterMarkers(FinalLowerable): @@ -24,6 +30,7 @@ class RegisterMarkers(FinalLowerable): @pairs_inline def lower(self): if self.sim._enable_profiler: - for m in self.sim.module_list: - if m.name != 'main' and m.name != 'initialize' and m.must_profile(): + # Only internal modules are profiled + for m in self.sim.modules(): + if m.must_profile(): Call_Void(self.sim, "LIKWID_MARKER_REGISTER", [m.name]) diff --git a/src/pairs/sim/interaction.py b/src/pairs/sim/interaction.py index 34bfa58dfb0d15b97b0ec87bff06ec8ef9d1f729..901c2d4b7f1930b9841b25e0f57e5e143fea77e0 100644 --- a/src/pairs/sim/interaction.py +++ b/src/pairs/sim/interaction.py @@ -1,13 +1,14 @@ from pairs.ir.assign import Assign from pairs.ir.ast_term import ASTTerm from pairs.ir.scalars import ScalarOp -from pairs.ir.block import Block, pairs_inline -from pairs.ir.branches import Filter +from pairs.ir.block import Block, pairs_block +from pairs.ir.branches import Filter, Branch from pairs.ir.loops import For, ParticleFor -from pairs.ir.math import Sqrt +from pairs.ir.math import Sqrt, Abs, Min, Sign from pairs.ir.select import Select from pairs.ir.types import Types from pairs.ir.vectors import Vector +from pairs.ir.print import Print from pairs.sim.flags import Flags from pairs.sim.lowerable import Lowerable from pairs.sim.shapes import Shapes @@ -46,7 +47,8 @@ class NeighborFor: self.particle = particle self.cell_lists = cell_lists self.neighbor_lists = neighbor_lists - self.shapes = range(sim.max_shapes()) if shapes is None else shapes + # self.shapes = range(sim.max_shapes()) if shapes is None else shapes + self.shapes = [shapes] def __str__(self): return f"NeighborFor<{self.particle}>" @@ -131,6 +133,7 @@ class NeighborFor: class InteractionData: def __init__(self, sim, shape): + self.sim = sim self._i = sim.add_symbol(Types.Int32) self._j = sim.add_symbol(Types.Int32) self._delta = sim.add_symbol(Types.Vector) @@ -139,6 +142,8 @@ class InteractionData: self._contact_point = sim.add_symbol(Types.Vector) self._contact_normal = sim.add_symbol(Types.Vector) self._shape = shape + self.contact_threshold = 0.0 + self.cutoff_condition = None def i(self): return self._i @@ -163,133 +168,274 @@ class InteractionData: def shape(self): return self._shape - + + def pointmass_pointmass(self, i, j, cutoff_radius): + position = self.sim.position() + delta = position[i] - position[j] + squared_distance = delta.x() * delta.x() + \ + delta.y() * delta.y() + \ + delta.z() * delta.z() + separation_dist = cutoff_radius * cutoff_radius + self.cutoff_condition = squared_distance < separation_dist + + self.delta().assign(delta) + self.squared_distance().assign(squared_distance) + + def sphere_halfspace(self, i, j): + position = self.sim.position() + radius = self.sim.property('radius') + normal = self.sim.property('normal') + + d = normal[j][0] * position[j][0] + \ + normal[j][1] * position[j][1] + \ + normal[j][2] * position[j][2] + + k = normal[j][0] * position[i][0] + \ + normal[j][1] * position[i][1] + \ + normal[j][2] * position[i][2] + + penetration_depth = k - radius[i] - d + self.cutoff_condition = penetration_depth < self.contact_threshold + tmp = radius[i] + penetration_depth + contact_normal = normal[j] + contact_point = position[i] - Vector(self.sim, [tmp, tmp, tmp]) * normal[j] + + self.penetration_depth().assign(penetration_depth) + self.contact_point().assign(contact_point) + self.contact_normal().assign(contact_normal) + + def sphere_sphere(self, i, j): + position = self.sim.position() + radius = self.sim.property('radius') + delta = position[i] - position[j] + squared_distance = delta.x() * delta.x() + \ + delta.y() * delta.y() + \ + delta.z() * delta.z() + separation_dist = radius[i] + radius[j] + self.contact_threshold + self.cutoff_condition = squared_distance < separation_dist * separation_dist + distance = Sqrt(self.sim, squared_distance) + penetration_depth = distance - radius[i] - radius[j] + contact_normal = delta * (1.0 / distance) + k = radius[j] + 0.5 * penetration_depth + contact_point = position[j] + contact_normal * k + + self.delta().assign(delta) + self.squared_distance().assign(squared_distance) + self.penetration_depth().assign(penetration_depth) + self.contact_point().assign(contact_point) + self.contact_normal().assign(contact_normal) + + def sphere_box(self, i, j, s_relative=True): + s = i # Sphere + b = j # Box + position = self.sim.position() + edge_length = self.sim.property('edge_length') + rotation_matrix = self.sim.property('rotation_matrix') + radius = self.sim.property('radius') + + l = edge_length[b] * 0.5 + rm = rotation_matrix[b] + delta = position[s] - position[b] + + # Distance to sphere in the box coodinate: p = rm.T * delta + p0 = delta[0]*rm[0] + delta[1]*rm[3] + delta[2]*rm[6] + p1 = delta[0]*rm[1] + delta[1]*rm[4] + delta[2]*rm[7] + p2 = delta[0]*rm[2] + delta[1]*rm[5] + delta[2]*rm[8] + + # Check if the sphere is outside the box. If yes, clamp p to box bounds + outside0 = ScalarOp.or_op( p0 < -l[0], p0 > l[0]) + p0 = ScalarOp.and_op(p0 >= -l[0], p0 <= l[0]) * p0 + (p0 < -l[0])*(-l[0]) + (p0 > l[0])*l[0] + outside1 = ScalarOp.or_op( p1 < -l[1], p1 > l[1]) + p1 = ScalarOp.and_op(p1 >= -l[1], p1 <= l[1]) * p1 + (p1 < -l[1])*(-l[1]) + (p1 > l[1])*l[1] + outside2 = ScalarOp.or_op( p2 < -l[2], p2 > l[2]) + p2 = ScalarOp.and_op(p2 >= -l[2], p2 <= l[2]) * p2 + (p2 < -l[2])*(-l[2]) + (p2 > l[2])*l[2] + + self.cutoff_condition = self.sim.add_temp_var(0) + squared_distance = self.sim.add_temp_var(0.0) + penetration_depth = self.sim.add_temp_var(0.0) + contact_point = self.sim.add_temp_var([0.0, 0.0, 0.0]) + contact_normal = self.sim.add_temp_var([0.0, 0.0, 0.0]) + + for outside in Branch(self.sim, ScalarOp.or_op(ScalarOp.or_op(outside0, outside1), outside2)): + if outside: + # Transfrom p to global coordinate: q = rm * p + q = Vector(self.sim, [ rm[0]*p0 + rm[1]*p1 + rm[2]*p2, + rm[3]*p0 + rm[4]*p1 + rm[5]*p2, + rm[6]*p0 + rm[7]*p1 + rm[8]*p2]) + + # Normal away from the box + n = delta - q + Assign(self.sim, squared_distance, n[0]*n[0] + n[1]*n[1] + n[2]*n[2]) + distance = Sqrt(self.sim, squared_distance) + Assign(self.sim, penetration_depth, distance - radius[s]) + Assign(self.sim, contact_point, position[b] + q) + Assign(self.sim, contact_normal, n * (1.0 / distance)) + Assign(self.sim, self.cutoff_condition, penetration_depth < self.contact_threshold) + else: + # Print(self.sim, "Sphere", s, "(", position[s][0], position[s][1], position[s][2], ") is inside box", b) + # If sphere is inside the box find the closest face + dist0 = l[0] - Abs(self.sim, p0) + dist1 = l[1] - Abs(self.sim, p1) + dist2 = l[2] - Abs(self.sim, p2) + mindist = Min(self.sim, Min(self.sim, dist0, dist1), dist2) + face = Select(self.sim, ScalarOp.cmp(mindist, dist0), 0, + Select(self.sim,ScalarOp.cmp(mindist, dist1), 1, 2)) + + n = self.sim.add_temp_var([0.0, 0.0, 0.0]) + # In this case, the normal away from the box is an axis-aligned unit vector in the box coordinate + # Assign(self.sim, n[face], Sign(self.sim, p[face])) + # FIXME: Issue with index generation (a Select node can't be an index for VectorAccess) + # hence the 3 lines below: + Assign(self.sim, n[0], Select(self.sim, ScalarOp.cmp(face,0), Sign(self.sim, p0), 0.0)) + Assign(self.sim, n[1], Select(self.sim, ScalarOp.cmp(face,1), Sign(self.sim, p1), 0.0)) + Assign(self.sim, n[2], Select(self.sim, ScalarOp.cmp(face,2), Sign(self.sim, p2), 0.0)) + + # Transofram n to global coordinates + Assign(self.sim, contact_normal[0], rm[0]*n[0] + rm[1]*n[1] + rm[2]*n[2]) + Assign(self.sim, contact_normal[1], rm[3]*n[0] + rm[4]*n[1] + rm[5]*n[2]) + Assign(self.sim, contact_normal[2], rm[6]*n[0] + rm[7]*n[1] + rm[8]*n[2]) + + Assign(self.sim, penetration_depth, - mindist - radius[s]) + Assign(self.sim, contact_point, position[s]) + Assign(self.sim, self.cutoff_condition, penetration_depth < self.contact_threshold) + + self.delta().assign(delta) + self.squared_distance().assign(squared_distance) + self.penetration_depth().assign(penetration_depth) + self.contact_point().assign(contact_point) + # Contact normal is always computed towards the sphere + self.contact_normal().assign(contact_normal if s_relative else -contact_normal) class ParticleInteraction(Lowerable): - def __init__(self, sim, nbody, cutoff_radius, use_cell_lists=False, split_kernels=False): + def __init__(self, sim, module_name, nbody, cutoff_radius=None, use_cell_lists=False, run_on_device=True, profile=False): super().__init__(sim) + self.run_on_device = run_on_device + self.profile = profile + self.module_name = module_name self.nbody = nbody - self.cutoff_radius = cutoff_radius self.contact_threshold = 0.0 self.use_cell_lists = use_cell_lists - self.split_kernels = split_kernels - self.nkernels = sim.max_shapes() if split_kernels else 1 + self.maxs = self.sim.max_shapes() self.interactions_data = {} - self.blocks = [Block(sim, []) for _ in range(sim.max_shapes())] - self.apply_list = [set() for _ in range(self.nkernels)] + self.cutoff_radius = cutoff_radius + + if any(self.sim.get_shape_id(s)==Shapes.PointMass for s in range(self.maxs)): + assert cutoff_radius is not None + + # We reserve n*n blocks and apply_lists for n shapes present in the system, but we only use the included i-j interactions + self.blocks = [Block(sim, []) for _ in range(self.maxs * self.maxs)] + self.apply_list = [set() for _ in range(self.maxs * self.maxs)] self.active_block = None + self.cell_lists = self.sim.cell_lists def add_statement(self, stmt): self.active_block.add_statement(stmt) + # Included interactions + # ------------------------------------------------------------------- + # i \ j | Sphere | Halfspace | PointMass | Box | + # ------------------------------------------------------------------- + # Sphere | 1 | 1 | 0 | 1 | + # Halfspace | 0 | 0 | 0 | 0 | + # PointMass | 0 | 0 | 1 | 0 | + # Box | 1 | 0 | 0 | 0 | + + def include_interaction(self, ishape, jshape): + id_i = self.sim.get_shape_id(ishape) + id_j = self.sim.get_shape_id(jshape) + return (id_i == Shapes.PointMass and id_j == Shapes.PointMass) or \ + (id_i == Shapes.Sphere and id_j == Shapes.Sphere) or \ + (id_i == Shapes.Sphere and id_j == Shapes.Halfspace) or \ + (id_i == Shapes.Sphere and id_j == Shapes.Box) or \ + (id_i == Shapes.Box and id_j == Shapes.Sphere) + + # Included kernels + def include_shape(self, ishape): + id_i = self.sim.get_shape_id(ishape) + return (id_i == Shapes.PointMass) or \ + (id_i == Shapes.Sphere) or \ + (id_i == Shapes.Box) + def __iter__(self): self.sim.add_statement(self) self.sim.enter(self) # Neighbors vary across iterations - for shape in range(self.sim.max_shapes()): - apply_list_id = shape if self.split_kernels else 0 - self.sim.use_apply_list(self.apply_list[apply_list_id]) - self.active_block = self.blocks[shape] - self.interactions_data[shape] = InteractionData(self.sim, shape) - yield self.interactions_data[shape] - self.sim.release_apply_list() + for ishape in range(self.sim.max_shapes()): + for jshape in range(self.sim.max_shapes()): + if self.include_interaction(ishape, jshape): + apply_list_id = ishape*self.maxs + jshape + self.sim.use_apply_list(self.apply_list[apply_list_id]) + self.active_block = self.blocks[ishape*self.maxs + jshape] + self.interactions_data[ishape*self.maxs + jshape] = InteractionData(self.sim, ishape*self.maxs + jshape) + yield self.interactions_data[ishape*self.maxs + jshape] + self.sim.release_apply_list() self.sim.leave() self.active_block = None - @pairs_inline + def apply_reductions(self, i, ishape, jshape): + prop_reductions = {} + for app in self.apply_list[ishape*self.maxs + jshape]: + prop = app.prop() + reduction = app.reduction_variable() + if prop not in prop_reductions: + prop_reductions[prop] = reduction + else: + prop_reductions[prop] = prop_reductions[prop] + reduction + + for prop, reduction in prop_reductions.items(): + Assign(self.sim, prop[i], prop[i] + reduction) + + def compute_interaction(self, i, j, ishape, jshape): + interaction_data = self.interactions_data[ishape*self.maxs + jshape] + interaction_data.i().assign(i) + interaction_data.j().assign(j) + ishape_id = self.sim.get_shape_id(ishape) + jshape_id = self.sim.get_shape_id(jshape) + + if ishape_id == Shapes.PointMass and jshape_id == Shapes.PointMass: + interaction_data.pointmass_pointmass(i, j, self.cutoff_radius) + + if ishape_id == Shapes.Sphere and jshape_id == Shapes.Sphere: + interaction_data.sphere_sphere(i, j) + + if ishape_id == Shapes.Sphere and jshape_id == Shapes.Halfspace: + interaction_data.sphere_halfspace(i, j) + + if ishape_id == Shapes.Sphere and jshape_id == Shapes.Box: + interaction_data.sphere_box(i, j) + + if ishape_id == Shapes.Box and jshape_id == Shapes.Sphere: + interaction_data.sphere_box(j, i, False) + + # Apply reductions for this i-j interaction: + # ------------------------------------------------------------- + for app in self.apply_list[ishape*self.maxs + jshape]: + app.add_reduction_variable() + + # The i-j block is executed only if the cutoff_condition of the i-j interaction is met + self.sim.add_statement(Filter(self.sim, interaction_data.cutoff_condition, self.blocks[ishape*self.maxs + jshape])) + self.apply_reductions(i, ishape, jshape) + + @pairs_block def lower(self): + self.sim.module_name(f"{self.module_name}_local_interactions") if self.nbody == 2: - position = self.sim.position() - cell_lists = self.sim.cell_lists neighbor_lists = None if self.use_cell_lists else self.sim.neighbor_lists - - for kernel in range(self.nkernels): - for i in ParticleFor(self.sim): - for _ in Filter(self.sim, ScalarOp.cmp(self.sim.particle_flags[i] & Flags.Fixed, 0)): - for app in self.apply_list[kernel]: - app.add_reduction_variable() - - shapes = [kernel] if self.split_kernels else None - interaction = kernel - for neigh in NeighborFor(self.sim, i, cell_lists, neighbor_lists, shapes): - interaction_data = self.interactions_data[interaction] - shape = interaction_data.shape() - shape_id = self.sim.get_shape_id(shape) - j = neigh.particle_index() - - if shape_id == Shapes.PointMass: - delta = position[i] - position[j] - squared_distance = delta.x() * delta.x() + \ - delta.y() * delta.y() + \ - delta.z() * delta.z() - separation_dist = self.cutoff_radius * self.cutoff_radius - cutoff_condition = squared_distance < separation_dist - distance = Sqrt(self.sim, squared_distance) - penetration_depth = None - contact_normal = None - contact_point = None - - elif shape_id == Shapes.Sphere: - radius = self.sim.property('radius') - delta = position[i] - position[j] - squared_distance = delta.x() * delta.x() + \ - delta.y() * delta.y() + \ - delta.z() * delta.z() - separation_dist = radius[i] + radius[j] + self.contact_threshold - cutoff_condition = squared_distance < separation_dist * separation_dist - distance = Sqrt(self.sim, squared_distance) - penetration_depth = distance - radius[i] - radius[j] - contact_normal = delta * (1.0 / distance) - k = radius[j] + 0.5 * penetration_depth - contact_point = position[j] + contact_normal * k - - elif shape_id == Shapes.Halfspace: - radius = self.sim.property('radius') - normal = self.sim.property('normal') - - d = normal[j][0] * position[j][0] + \ - normal[j][1] * position[j][1] + \ - normal[j][2] * position[j][2] - - k = normal[j][0] * position[i][0] + \ - normal[j][1] * position[i][1] + \ - normal[j][2] * position[i][2] - - penetration_depth = k - radius[i] - d - cutoff_condition = penetration_depth < self.contact_threshold - tmp = radius[i] + penetration_depth - contact_normal = normal[j] - contact_point = position[i] - Vector(self.sim, [tmp, tmp, tmp]) * normal[j] - - else: - raise Exception("Invalid shape id.") - - interaction_data.i().assign(i) - interaction_data.j().assign(j) - interaction_data.delta().assign(delta) - interaction_data.squared_distance().assign(squared_distance) - interaction_data.penetration_depth().assign(penetration_depth) - interaction_data.contact_point().assign(contact_point) - interaction_data.contact_normal().assign(contact_normal) - self.sim.add_statement( - Filter(self.sim, cutoff_condition, self.blocks[shape])) - interaction += 1 - - prop_reductions = {} - for app in self.apply_list[kernel]: - prop = app.prop() - reduction = app.reduction_variable() - - if prop not in prop_reductions: - prop_reductions[prop] = reduction - - else: - prop_reductions[prop] = prop_reductions[prop] + reduction - - for prop, reduction in prop_reductions.items(): - Assign(self.sim, prop[i], prop[i] + reduction) + for ishape in range(self.maxs): + if self.include_shape(ishape): + # A kernel for each ishape + for i in ParticleFor(self.sim): + for _ in Filter(self.sim, ScalarOp.and_op( + ScalarOp.cmp(self.sim.particle_shape[i], self.sim.get_shape_id(ishape)), + ScalarOp.not_op(self.sim.particle_flags[i] & (Flags.Infinite | Flags.Global)))): + for jshape in range(self.maxs): + if self.include_interaction(ishape, jshape): + # Inner loops for each jshaped neighbor + for neigh in NeighborFor(self.sim, i, self.cell_lists, neighbor_lists, jshape): + j = neigh.particle_index() + self.compute_interaction(i, j, ishape, jshape) else: raise Exception("Interactions among more than two particles are currently not supported.") diff --git a/src/pairs/sim/load_balancing_algorithms.py b/src/pairs/sim/load_balancing_algorithms.py deleted file mode 100644 index 165d151cf4c936ef2184ab9dabbb98b0e634df2f..0000000000000000000000000000000000000000 --- a/src/pairs/sim/load_balancing_algorithms.py +++ /dev/null @@ -1,13 +0,0 @@ -class LoadBalancingAlgorithms: - Morton = 0 - Hilbert = 1 - Diffusive = 3 - Metis = 2 - - def c_keyword(algorithm): - return "Hilbert" if algorithm == LoadBalancingAlgorithms.Hilbert else \ - "Morton" if algorithm == LoadBalancingAlgorithms.Morton else \ - "Diffusive" if algorithm == LoadBalancingAlgorithms.Diffusive else \ - "Metis" if algorithm == LoadBalancingAlgorithms.Metis else \ - "Invalid" - \ No newline at end of file diff --git a/src/pairs/sim/shapes.py b/src/pairs/sim/shapes.py index 3f768c668e2c1f619159ff4e9b11e5b986b48363..a5f3a23b79303cd29737cc35ed7100c418fc1767 100644 --- a/src/pairs/sim/shapes.py +++ b/src/pairs/sim/shapes.py @@ -2,3 +2,4 @@ class Shapes: Sphere = 0 Halfspace = 1 PointMass = 2 + Box = 3 \ No newline at end of file diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py index f7360b4e4e8d28c6f4a5407f5af9162188c0a890..3f8f084063186bea8350059ecc0d52aec9f0212a 100644 --- a/src/pairs/sim/simulation.py +++ b/src/pairs/sim/simulation.py @@ -1,37 +1,22 @@ from pairs.ir.arrays import Arrays from pairs.ir.block import Block -from pairs.ir.branches import Filter from pairs.ir.features import Features, FeatureProperties from pairs.ir.kernel import Kernel from pairs.ir.layouts import Layouts -from pairs.ir.module import Module, ModuleCall +from pairs.ir.module import Module from pairs.ir.properties import Properties, ContactProperties from pairs.ir.symbols import Symbol from pairs.ir.types import Types from pairs.ir.variables import Variables #from pairs.graph.graphviz import ASTGraph -from pairs.mapping.funcs import compute, setup -from pairs.sim.arrays import DeclareArrays -from pairs.sim.cell_lists import CellLists, BuildCellLists, BuildCellListsStencil, PartitionCellLists, BuildCellNeighborLists -from pairs.sim.comm import Comm, Synchronize, Borders, Exchange, ReverseComm -from pairs.sim.contact_history import ContactHistory, BuildContactHistory, ClearUnusedContactHistory, ResetContactHistoryUsageStatus -from pairs.sim.copper_fcc_lattice import CopperFCCLattice -from pairs.sim.dem_sc_grid import DEMSCGrid -from pairs.sim.domain import InitializeDomain, UpdateDomain +from pairs.mapping.funcs import compute +from pairs.sim.cell_lists import CellLists, BuildCellLists, PartitionCellLists, BuildCellNeighborLists +from pairs.sim.comm import Comm +from pairs.sim.contact_history import ContactHistory from pairs.sim.domain_partitioners import DomainPartitioners from pairs.sim.domain_partitioning import BlockForest, DimensionRanges -from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms -from pairs.sim.features import AllocateFeatureProperties -from pairs.sim.grid import Grid2D, Grid3D, MutableGrid -from pairs.sim.instrumentation import RegisterMarkers, RegisterTimers -from pairs.sim.lattice import ParticleLattice +from pairs.sim.grid import Grid3D from pairs.sim.neighbor_lists import NeighborLists, BuildNeighborLists -from pairs.sim.properties import AllocateProperties, AllocateContactProperties, ResetVolatileProperties -from pairs.sim.read_from_file import ReadParticleData -from pairs.sim.thermo import ComputeThermo -from pairs.sim.timestep import Timestep -from pairs.sim.variables import DeclareVariables -from pairs.sim.vtk import VTKWrite from pairs.transformations import Transformations from pairs.code_gen.interface import InterfaceModules @@ -44,17 +29,14 @@ class Simulation: code_gen, shapes, dims=3, - timesteps=100, double_prec=False, use_contact_history=False, particle_capacity=800000, - neighbor_capacity=100, - generate_whole_program=False): + neighbor_capacity=100): # Code generator for the simulation self.code_gen = code_gen self.code_gen.assign_simulation(self) - self._generate_whole_program = generate_whole_program # Data structures to be generated self.position_prop = None @@ -66,7 +48,6 @@ class Simulation: self.contact_properties = ContactProperties(self) # General capacities, sizes and particle properties - self.sim_timestep = self.add_var('sim_timestep', Types.Int32, runtime=True) self.particle_capacity = \ self.add_var('particle_capacity', Types.Int32, particle_capacity, runtime=True) self.neighbor_capacity = self.add_var('neighbor_capacity', Types.Int32, neighbor_capacity) @@ -93,23 +74,13 @@ class Simulation: self._capture_statements = True self._block = Block(self, []) - # Different segments of particle code/functions - self.create_domain = Block(self, []) - self.create_domain_at_initialization = False - - self.setup_particles = Block(self, []) + # Interface modules + self.interface_module_list = [] + + # Internal modules and kernels self.module_list = [] self.kernel_list = [] - # Individual user-defined and interface modules are created only when generate_whole_program is False - self.udf_module_list = [] - self.interface_module_list = [] - - # User-defined functions to be called by other subroutines (used only when generate_whole_program is True) - self.setup_functions = [] - self.pre_step_functions = [] - self.functions = [] - # Structures to generated resize code for capacities self._check_properties_resize = False self._resizes_to_check = {} @@ -131,7 +102,6 @@ class Simulation: self._module_name = None # Current module name self._double_prec = double_prec # Use double-precision FP arithmetic self.dims = dims # Number of dimensions - self.ntimesteps = timesteps # Number of time-steps self.reneighbor_frequency = 1 # Re-neighbor frequency self.rebalance_frequency = 0 # Re-balance frequency for dynamic load balancing self._target = None # Hardware target info @@ -155,14 +125,6 @@ class Simulation: else: raise Exception("Invalid domain partitioner.") - def set_workload_balancer(self, algorithm=LoadBalancingAlgorithms.Morton, - regrid_min=100, regrid_max=1000, rebalance_frequency=0): - assert self._partitioner == DomainPartitioners.BlockForest, "Load balancing is only supported by BlockForest." - self.rebalance_frequency = rebalance_frequency - self._dom_part.load_balancer = algorithm - self._dom_part.regrid_min = regrid_min - self._dom_part.regrid_max = regrid_max - def partitioner(self): return self._partitioner @@ -181,45 +143,25 @@ class Simulation: def max_shapes(self): return len(self._shapes) - def add_udf_module(self, module): - assert isinstance(module, Module), "add_udf_module(): Given parameter is not of type Module!" - assert module.user_defined and not module.interface - if module.name not in [m.name for m in self.udf_module_list]: - self.udf_module_list.append(module) - def add_interface_module(self, module): assert isinstance(module, Module), "add_interface_module(): Given parameter is not of type Module!" - assert module.interface and not module.user_defined + assert module.interface if module.name not in [m.name for m in self.interface_module_list]: self.interface_module_list.append(module) def add_module(self, module): assert isinstance(module, Module), "add_module(): Given parameter is not of type Module!" - assert not module.interface and not module.user_defined + assert not module.interface if module.name not in [m.name for m in self.module_list]: self.module_list.append(module) def interface_modules(self): + """List interface modules""" return self.interface_module_list - - def udf_modules(self): - return self.udf_module_list - + def modules(self): - """List simulation modules, with main always in the last position""" - - sorted_mods = [] - main_mod = None - for m in self.module_list: - if m.name != 'main': - sorted_mods.append(m) - else: - main_mod = m - - if main_mod is not None: - sorted_mods += [main_mod] - - return sorted_mods + """List internal modules""" + return self.module_list def add_kernel(self, kernel): assert isinstance(kernel, Kernel), "add_kernel(): Given parameter is not of type Kernel!" @@ -301,45 +243,22 @@ class Simulation: assert self.var(var_name) is None, f"Variable already defined: {var_name}" return self.vars.add(var_name, var_type, init_value, runtime) - def add_temp_var(self, init_value): - return self.vars.add_temp(init_value) + def add_temp_var(self, init_value, type=None): + return self.vars.add_temp(init_value, type) - def add_symbol(self, sym_type): - return Symbol(self, sym_type) + def add_symbol(self, sym_type, name=None): + return Symbol(self, sym_type, name) def var(self, var_name): return self.vars.find(var_name) def set_domain(self, grid): - """Set domain bounds. - If the domain is set through this function, the 'set_domain' module won't be generated in the modular version. - Use this function only if you do not need to set domain at runtime. - This function is required only for whole-program generation.""" - self.create_domain_at_initialization = True + """Set domain bounds if they are known at P4IRS compile time""" self.grid = Grid3D(self, grid[0], grid[1], grid[2], grid[3], grid[4], grid[5]) - self.create_domain.add_statement(InitializeDomain(self)) def reneighbor_every(self, frequency): self.reneighbor_frequency = frequency - def create_particle_lattice(self, grid, spacing, props={}): - self.setup_particles.add_statement(ParticleLattice(self, grid, spacing, props, self.position())) - - def read_particle_data(self, filename, prop_names, shape_id): - """Generate statement to read particle data from file""" - props = [self.property(prop_name) for prop_name in prop_names] - self.setup_particles.add_statement(ReadParticleData(self, filename, props, shape_id)) - - def copper_fcc_lattice(self, nx, ny, nz, rho, temperature, ntypes): - """Specific initialization for MD Copper FCC lattice case""" - self.setup_particles.add_statement(CopperFCCLattice(self, nx, ny, nz, rho, temperature, ntypes)) - - def dem_sc_grid(self, xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, initial_velocity, particle_density, ntypes): - """Specific initialization for DEM grid""" - self.setup_particles.add_statement( - DEMSCGrid(self, xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, - initial_velocity, particle_density, ntypes)) - def build_cell_lists(self, spacing=None, store_neighbors_per_cell=False): """Add routines to build the linked-cells acceleration structure. Leave spacing as None so it can be set at runtime.""" @@ -358,11 +277,8 @@ class Simulation: self.neighbor_lists = NeighborLists(self, self.cell_lists) return self.neighbor_lists - def compute(self, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=False, skip_first=False): - return compute(self, func, cutoff_radius, symbols, parameters, pre_step, skip_first) - - def setup(self, func, symbols={}): - return setup(self, func, symbols) + def compute(self, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False, run_on_device=True, profile=False): + return compute(self, func, cutoff_radius, symbols, parameters, compute_globals, run_on_device, profile) def init_block(self): """Initialize new block in this simulation instance""" @@ -386,60 +302,15 @@ class Simulation: else: raise Exception("Two sizes assigned to same capacity!") - def build_setup_module_with_statements(self): - """Build a Module in the setup part of the program using the last initialized block""" - - self.setup_functions.append( - Module(self, - name=self._module_name, - block=Block(self, self._block), - resizes_to_check=self._resizes_to_check, - check_properties_resize=self._check_properties_resize, - run_on_device=True)) - - def build_pre_step_module_with_statements(self, run_on_device=True, skip_first=False, profile=False): - """Build a Module in the pre-step part of the program using the last initialized block""" - module = Module(self, name=self._module_name, - block=Block(self, self._block), - resizes_to_check=self._resizes_to_check, - check_properties_resize=self._check_properties_resize, - run_on_device=run_on_device) - - if profile: - module.profile() - - if skip_first: - self.pre_step_functions.append((module, {'skip_first': True})) - - else: - self.pre_step_functions.append(module) - - def build_module_with_statements(self, run_on_device=True, skip_first=False, profile=False): - """Build a Module in the compute part of the program using the last initialized block""" - module = Module(self, name=self._module_name, - block=Block(self, self._block), - resizes_to_check=self._resizes_to_check, - check_properties_resize=self._check_properties_resize, - run_on_device=run_on_device) - if profile: - module.profile() - - if skip_first: - self.functions.append((module, {'skip_first': True})) - - else: - self.functions.append(module) - - def build_user_defined_function(self, run_on_device=True): + def build_interface_module_with_statements(self, run_on_device=False): """Build a user-defined Module that will be callable seperately as part of the interface""" Module(self, name=self._module_name, block=Block(self, self._block), resizes_to_check=self._resizes_to_check, check_properties_resize=self._check_properties_resize, run_on_device=run_on_device, - user_defined=True) + interface=True) - def capture_statements(self, capture=True): """When toggled, all constructed statements are captured and automatically added to the last initialized block""" self._capture_statements = capture @@ -524,130 +395,11 @@ class Simulation: self._comm = Comm(self, self._dom_part) self.create_update_cells_block() - if self._generate_whole_program: - self.generate_program() - else: - self.generate_library() - - def generate_library(self): InterfaceModules(self).create_all() - - # User defined functions are wrapped inside seperate interface modules here. - # The udf's have the same name as their interface module but they get implemented in the pairs::internal scope. - for m in self.udf_module_list: - module = Module(self, name=m.name, block=Block(self, m), interface=True) - module._id = m._id - Transformations(self.interface_modules(), self._target).apply_all() # Generate library self.code_gen.generate_library() # Generate getters for the runtime functions - self.code_gen.generate_interfaces() - - def generate_program(self): - assert self.grid, "No domain is created. Set domain bounds with 'set_domain'." - - reverse_comm_module = ReverseComm(self._comm, reduce=True) - - # Params that determine when a method must be called only when reneighboring - every_reneighbor_params = {'every': self.reneighbor_frequency} - - timestep_procedures = [] - - # First steps executed during each time-step in the simulation - timestep_procedures += self.pre_step_functions - - # Rebalancing routines - if self.rebalance_frequency: - update_domain_procedures = Block.from_list(self, [ - Exchange(self._comm), - UpdateDomain(self), - Borders(self._comm), - ResetVolatileProperties(self), - BuildCellListsStencil(self, self.cell_lists), - self.update_cells_procedures - ]) - - timestep_procedures.append((update_domain_procedures, {'every': self.rebalance_frequency})) - - # Communication routines - timestep_procedures += [(Exchange(self._comm), every_reneighbor_params), - (Borders(self._comm), Synchronize(self._comm), every_reneighbor_params)] - - # Update acceleration data structures - timestep_procedures += [(self.update_cells_procedures, every_reneighbor_params)] - - # Add routines for contact history management - if self._use_contact_history: - if self.neighbor_lists is not None: - timestep_procedures.append( - (BuildContactHistory(self, self._contact_history, self.cell_lists), - every_reneighbor_params)) - - timestep_procedures.append(ResetContactHistoryUsageStatus(self, self._contact_history)) - - # Reset volatile properties - timestep_procedures += [ResetVolatileProperties(self)] - - # Add computational kernels - timestep_procedures += self.functions - - # For whole-program-generation, add reverse_comm wherever needed in the timestep loop (eg: after computational kernels) like this: - timestep_procedures += [reverse_comm_module] - - # Clear unused contact history - if self._use_contact_history: - timestep_procedures.append(ClearUnusedContactHistory(self, self._contact_history)) - - # Add routine to calculate thermal data - if self._compute_thermo != 0: - timestep_procedures.append( - (ComputeThermo(self), {'every': self._compute_thermo})) - - - # Data structures and timer/markers initialization - inits = Block.from_list(self, [ - DeclareVariables(self), - DeclareArrays(self), - AllocateProperties(self), - AllocateContactProperties(self), - AllocateFeatureProperties(self), - RegisterTimers(self), - RegisterMarkers(self) - ]) - - # Construct the time-step loop - timestep = Timestep(self, self.ntimesteps, timestep_procedures) - self.enter(timestep.block) - - # Add routine to write VTK data when set - if self.vtk_file is not None: - timestep.add(VTKWrite(self, self.vtk_file, timestep.timestep(), self.vtk_frequency)) - - self.leave() - - # Combine everything into a whole program - # Initialization and setup functions, together with time-step loop - # UpdateDomain is added after setup_particles because particles must be already present in the simulation - body = Block.from_list(self, [ - self.create_domain, - self.setup_particles, - UpdateDomain(self), - self.setup_functions, - BuildCellListsStencil(self, self.cell_lists), - timestep.as_block() - ]) - - program = Module(self, name='main', block=Block.merge_blocks(inits, body)) - - # Apply transformations - transformations = Transformations(program, self._target) - transformations.apply_all() - - # Generate whole program - self.code_gen.generate_program(program) - - # Generate getters for the runtime functions - self.code_gen.generate_interfaces() + self.code_gen.generate_interfaces() \ No newline at end of file diff --git a/src/pairs/sim/thermo.py b/src/pairs/sim/thermo.py deleted file mode 100644 index 557a756deb80888aca531f1f27e78499f7e53077..0000000000000000000000000000000000000000 --- a/src/pairs/sim/thermo.py +++ /dev/null @@ -1,19 +0,0 @@ -from pairs.ir.assign import Assign -from pairs.ir.block import pairs_inline -from pairs.ir.functions import Call_Int, Call_Void -from pairs.ir.particle_attributes import ParticleAttributeList -from pairs.ir.types import Types -from pairs.sim.grid import Grid3D -from pairs.sim.lowerable import Lowerable - - -class ComputeThermo(Lowerable): - def __init__(self, sim): - super().__init__(sim) - - @pairs_inline - def lower(self): - xprd = self.sim.grid.length(0) - yprd = self.sim.grid.length(1) - zprd = self.sim.grid.length(2) - Call_Void(self.sim, "pairs::compute_thermo", [self.sim.nlocal, xprd, yprd, zprd, 1]) diff --git a/src/pairs/sim/timestep.py b/src/pairs/sim/timestep.py deleted file mode 100644 index abef09a055507f431af554bf7163603e35f8e3cc..0000000000000000000000000000000000000000 --- a/src/pairs/sim/timestep.py +++ /dev/null @@ -1,72 +0,0 @@ -from pairs.ir.scalars import ScalarOp -from pairs.ir.block import Block -from pairs.ir.branches import Branch, Filter -from pairs.ir.functions import Call_Void -from pairs.ir.loops import For -from pairs.ir.timers import Timers - - -class Timestep: - def __init__(self, sim, nsteps, item_list=None): - self.sim = sim - self.block = Block(sim, []) - self.timestep_loop = For(sim, 0, nsteps + 1, self.block) if self.sim._generate_whole_program else None - - if item_list is not None: - for item in item_list: - if isinstance(item, tuple): - stmt_else = None - - if len(item) == 2: - stmt, params = item - - if len(item) == 3: - stmt, stmt_else, params = item - - exec_every = 0 if 'every' not in params else params['every'] - skip_first = False if 'skip_first' not in params else params['skip_first'] - self.add(stmt, exec_every, stmt_else, skip_first) - - else: - self.add(item) - - def timestep(self): - return self.timestep_loop.iter() if self.sim._generate_whole_program else self.sim.sim_timestep - - def add(self, item, exec_every=0, item_else=None, skip_first=False): - assert exec_every >= 0, "exec_every parameter must be higher or equal than zero!" - stmts = item if not isinstance(item, Block) else item.statements() - stmts_else = None - ts = self.timestep() - self.sim.enter(self.block) - - if item_else is not None: - stmts_else = item_else if not isinstance(item_else, Block) else item_else.statements() - - if exec_every > 0: - cond = ScalarOp.or_op(ScalarOp.cmp((ts + 1) % exec_every, 0), ScalarOp.cmp(ts, 0)) - one_way = True if stmts_else is None else False - - self.block.add_statement( - Branch(self.sim, ScalarOp.inline(cond), one_way, - Block(self.sim, stmts), - Block(self.sim, stmts_else) if not one_way else None)) - - elif skip_first: - self.block.add_statement(Filter(self.sim, ScalarOp.inline(ts > 0), Block(self.sim, stmts))) - - else: - self.block.add_statement(stmts) - - self.sim.leave() - - def as_block(self): - _capture = self.sim._capture_statements - self.sim.capture_statements(False) - - block = Block(self.sim, [Call_Void(self.sim, "pairs::start_timer", [Timers.All]), - self.timestep_loop if self.sim._generate_whole_program else self.block, - Call_Void(self.sim, "pairs::stop_timer", [Timers.All])]) - - self.sim.capture_statements(_capture) - return block diff --git a/src/pairs/transformations/__init__.py b/src/pairs/transformations/__init__.py index 733d5c10fbaec621d37db6a7009e64493f752719..314fd176652708d4286f7f17dc3e7e64ffebfa5f 100644 --- a/src/pairs/transformations/__init__.py +++ b/src/pairs/transformations/__init__.py @@ -73,6 +73,7 @@ class Transformations: def add_device_copies(self): if self._target.is_gpu(): + self.analysis().fetch_device_copies() self.apply(AddDeviceCopies(), [self._module_resizes]) self.analysis().fetch_modules_references() @@ -104,14 +105,11 @@ class Transformations: self.licm() self.modularize() self.add_device_kernels() + self.add_host_references_to_modules() self.add_device_copies() self.lower(True) self.add_expression_declarations() - self.add_host_references_to_modules() + # self.add_host_references_to_modules() self.add_device_references_to_modules() - - # TODO: Place stop timers before the function returns - # or simply don't instrument modules that have a non-void return type - # to avoid having to deal with returns within conditional blocks - # self.add_instrumentation() + self.add_instrumentation() diff --git a/src/pairs/transformations/devices.py b/src/pairs/transformations/devices.py index d33f30ef174ba06b07d34908aff49926ea982fe2..1eb6f1d36a7d97036de2336257508f0879dcd825 100644 --- a/src/pairs/transformations/devices.py +++ b/src/pairs/transformations/devices.py @@ -36,15 +36,19 @@ class AddDeviceCopies(Mutator): if isinstance(s, ModuleCall): copy_context = Contexts.Device if s.module.run_on_device else Contexts.Host clear_context = Contexts.Host if s.module.run_on_device else Contexts.Device - new_stmts += [ - Call_Void(ast_node.sim, "pairs::start_timer", [Timers.DeviceTransfers]) - ] for array, action in s.module.arrays().items(): - new_stmts += [CopyArray(s.sim, array, copy_context, action)] + # TODO: Add device copies only if they are not mannualy taken care of inside the module + # if array not in s.module.device_copies(): + new_stmts += [CopyArray(s.sim, array, copy_context, action)] + # TODO: Add copyToHost for host references in device modules + # if array in s.module.host_references(): + # new_stmts += [CopyArray(s.sim, array, Contexts.Host, action)] for prop, action in s.module.properties().items(): new_stmts += [CopyProperty(s.sim, prop, copy_context, action)] + # if prop in s.module.host_references(): + # new_stmts += [CopyProperty(s.sim, prop, Contexts.Host, action)] for fp, action in s.module.feature_properties().items(): new_stmts += [CopyFeatureProperty(s.sim, fp, copy_context, action)] @@ -59,28 +63,20 @@ class AddDeviceCopies(Mutator): for var, action in s.module.variables().items(): if action != Actions.ReadOnly and var.device_flag: new_stmts += [CopyVar(s.sim, var, Contexts.Device, action)] + # if var not in s.module.device_copies() and var in s.module.host_references(): + # new_stmts += [CopyVar(s.sim, var, Contexts.Host, action)] - new_stmts += [ - Call_Void(ast_node.sim, "pairs::stop_timer", [Timers.DeviceTransfers]) - ] new_stmts.append(s) if isinstance(s, ModuleCall): if s.module.run_on_device: - new_stmts += [ - Call_Void(ast_node.sim, "pairs::start_timer", [Timers.DeviceTransfers]) - ] - for var, action in s.module.variables().items(): if action != Actions.ReadOnly and var.device_flag: new_stmts += [CopyVar(s.sim, var, Contexts.Host, action)] if self.module_resizes[s.module]: new_stmts += [CopyArray(s.sim, s.sim.resizes, Contexts.Host, Actions.Ignore)] - new_stmts += [ - Call_Void(ast_node.sim, "pairs::stop_timer", [Timers.DeviceTransfers]) - ] ast_node.stmts = new_stmts return ast_node diff --git a/src/pairs/transformations/instrumentation.py b/src/pairs/transformations/instrumentation.py index 88b73c0d8406b97392d267ace3b1e1bbbb3ca068..5d039b8cb3ed8ee628b098a6126491b49402e107 100644 --- a/src/pairs/transformations/instrumentation.py +++ b/src/pairs/transformations/instrumentation.py @@ -1,8 +1,8 @@ from pairs.ir.block import Block from pairs.ir.functions import Call_Void -from pairs.ir.module import ModuleCall from pairs.ir.mutator import Mutator from pairs.ir.timers import Timers +from pairs.ir.types import Types class AddModulesInstrumentation(Mutator): @@ -12,7 +12,8 @@ class AddModulesInstrumentation(Mutator): def mutate_ModuleCall(self, ast_node): ast_node._module = self.mutate(ast_node._module) module = ast_node._module - if module.name == 'main' or module.name == 'initialize': + + if module.name == 'initialize' or module.name == 'end' or module.return_type!=Types.Void: return ast_node if module.must_profile():